OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [tree-vect-stmts.c] - Blame information for rev 861

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Statement Analysis and Transformation for Vectorization
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
   and Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "target.h"
30
#include "basic-block.h"
31
#include "diagnostic.h"
32
#include "tree-flow.h"
33
#include "tree-dump.h"
34
#include "cfgloop.h"
35
#include "cfglayout.h"
36
#include "expr.h"
37
#include "recog.h"
38
#include "optabs.h"
39
#include "toplev.h"
40
#include "tree-vectorizer.h"
41
#include "langhooks.h"
42
 
43
 
44
/* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
45
 
46
/* Function vect_mark_relevant.
47
 
48
   Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
49
 
50
static void
51
vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52
                    enum vect_relevant relevant, bool live_p)
53
{
54
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55
  enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56
  bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
57
 
58
  if (vect_print_dump_info (REPORT_DETAILS))
59
    fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
60
 
61
  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
62
    {
63
      gimple pattern_stmt;
64
 
65
      /* This is the last stmt in a sequence that was detected as a
66
         pattern that can potentially be vectorized.  Don't mark the stmt
67
         as relevant/live because it's not going to be vectorized.
68
         Instead mark the pattern-stmt that replaces it.  */
69
 
70
      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
71
 
72
      if (vect_print_dump_info (REPORT_DETAILS))
73
        fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74
      stmt_info = vinfo_for_stmt (pattern_stmt);
75
      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76
      save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77
      save_live_p = STMT_VINFO_LIVE_P (stmt_info);
78
      stmt = pattern_stmt;
79
    }
80
 
81
  STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82
  if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83
    STMT_VINFO_RELEVANT (stmt_info) = relevant;
84
 
85
  if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86
      && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
87
    {
88
      if (vect_print_dump_info (REPORT_DETAILS))
89
        fprintf (vect_dump, "already marked relevant/live.");
90
      return;
91
    }
92
 
93
  VEC_safe_push (gimple, heap, *worklist, stmt);
94
}
95
 
96
 
97
/* Function vect_stmt_relevant_p.
98
 
99
   Return true if STMT in loop that is represented by LOOP_VINFO is
100
   "relevant for vectorization".
101
 
102
   A stmt is considered "relevant for vectorization" if:
103
   - it has uses outside the loop.
104
   - it has vdefs (it alters memory).
105
   - control stmts in the loop (except for the exit condition).
106
 
107
   CHECKME: what other side effects would the vectorizer allow?  */
108
 
109
static bool
110
vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111
                      enum vect_relevant *relevant, bool *live_p)
112
{
113
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
114
  ssa_op_iter op_iter;
115
  imm_use_iterator imm_iter;
116
  use_operand_p use_p;
117
  def_operand_p def_p;
118
 
119
  *relevant = vect_unused_in_scope;
120
  *live_p = false;
121
 
122
  /* cond stmt other than loop exit cond.  */
123
  if (is_ctrl_stmt (stmt)
124
      && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
125
         != loop_exit_ctrl_vec_info_type)
126
    *relevant = vect_used_in_scope;
127
 
128
  /* changing memory.  */
129
  if (gimple_code (stmt) != GIMPLE_PHI)
130
    if (gimple_vdef (stmt))
131
      {
132
        if (vect_print_dump_info (REPORT_DETAILS))
133
          fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
134
        *relevant = vect_used_in_scope;
135
      }
136
 
137
  /* uses outside the loop.  */
138
  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
139
    {
140
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
141
        {
142
          basic_block bb = gimple_bb (USE_STMT (use_p));
143
          if (!flow_bb_inside_loop_p (loop, bb))
144
            {
145
              if (vect_print_dump_info (REPORT_DETAILS))
146
                fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
147
 
148
              if (is_gimple_debug (USE_STMT (use_p)))
149
                continue;
150
 
151
              /* We expect all such uses to be in the loop exit phis
152
                 (because of loop closed form)   */
153
              gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
154
              gcc_assert (bb == single_exit (loop)->dest);
155
 
156
              *live_p = true;
157
            }
158
        }
159
    }
160
 
161
  return (*live_p || *relevant);
162
}
163
 
164
 
165
/* Function exist_non_indexing_operands_for_use_p
166
 
167
   USE is one of the uses attached to STMT. Check if USE is
168
   used in STMT for anything other than indexing an array.  */
169
 
170
static bool
171
exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
172
{
173
  tree operand;
174
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
175
 
176
  /* USE corresponds to some operand in STMT. If there is no data
177
     reference in STMT, then any operand that corresponds to USE
178
     is not indexing an array.  */
179
  if (!STMT_VINFO_DATA_REF (stmt_info))
180
    return true;
181
 
182
  /* STMT has a data_ref. FORNOW this means that its of one of
183
     the following forms:
184
     -1- ARRAY_REF = var
185
     -2- var = ARRAY_REF
186
     (This should have been verified in analyze_data_refs).
187
 
188
     'var' in the second case corresponds to a def, not a use,
189
     so USE cannot correspond to any operands that are not used
190
     for array indexing.
191
 
192
     Therefore, all we need to check is if STMT falls into the
193
     first case, and whether var corresponds to USE.  */
194
 
195
  if (!gimple_assign_copy_p (stmt))
196
    return false;
197
  if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
198
    return false;
199
  operand = gimple_assign_rhs1 (stmt);
200
  if (TREE_CODE (operand) != SSA_NAME)
201
    return false;
202
 
203
  if (operand == use)
204
    return true;
205
 
206
  return false;
207
}
208
 
209
 
210
/*
211
   Function process_use.
212
 
213
   Inputs:
214
   - a USE in STMT in a loop represented by LOOP_VINFO
215
   - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
216
     that defined USE. This is done by calling mark_relevant and passing it
217
     the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
218
 
219
   Outputs:
220
   Generally, LIVE_P and RELEVANT are used to define the liveness and
221
   relevance info of the DEF_STMT of this USE:
222
       STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
223
       STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
224
   Exceptions:
225
   - case 1: If USE is used only for address computations (e.g. array indexing),
226
   which does not need to be directly vectorized, then the liveness/relevance
227
   of the respective DEF_STMT is left unchanged.
228
   - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
229
   skip DEF_STMT cause it had already been processed.
230
   - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
231
   be modified accordingly.
232
 
233
   Return true if everything is as expected. Return false otherwise.  */
234
 
235
static bool
236
process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
237
             enum vect_relevant relevant, VEC(gimple,heap) **worklist)
238
{
239
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
240
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
241
  stmt_vec_info dstmt_vinfo;
242
  basic_block bb, def_bb;
243
  tree def;
244
  gimple def_stmt;
245
  enum vect_def_type dt;
246
 
247
  /* case 1: we are only interested in uses that need to be vectorized.  Uses
248
     that are used for address computation are not considered relevant.  */
249
  if (!exist_non_indexing_operands_for_use_p (use, stmt))
250
     return true;
251
 
252
  if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
253
    {
254
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
255
        fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
256
      return false;
257
    }
258
 
259
  if (!def_stmt || gimple_nop_p (def_stmt))
260
    return true;
261
 
262
  def_bb = gimple_bb (def_stmt);
263
  if (!flow_bb_inside_loop_p (loop, def_bb))
264
    {
265
      if (vect_print_dump_info (REPORT_DETAILS))
266
        fprintf (vect_dump, "def_stmt is out of loop.");
267
      return true;
268
    }
269
 
270
  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
271
     DEF_STMT must have already been processed, because this should be the
272
     only way that STMT, which is a reduction-phi, was put in the worklist,
273
     as there should be no other uses for DEF_STMT in the loop.  So we just
274
     check that everything is as expected, and we are done.  */
275
  dstmt_vinfo = vinfo_for_stmt (def_stmt);
276
  bb = gimple_bb (stmt);
277
  if (gimple_code (stmt) == GIMPLE_PHI
278
      && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
279
      && gimple_code (def_stmt) != GIMPLE_PHI
280
      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
281
      && bb->loop_father == def_bb->loop_father)
282
    {
283
      if (vect_print_dump_info (REPORT_DETAILS))
284
        fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
285
      if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
286
        dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
287
      gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
288
      gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
289
                  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
290
      return true;
291
    }
292
 
293
  /* case 3a: outer-loop stmt defining an inner-loop stmt:
294
        outer-loop-header-bb:
295
                d = def_stmt
296
        inner-loop:
297
                stmt # use (d)
298
        outer-loop-tail-bb:
299
                ...               */
300
  if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
301
    {
302
      if (vect_print_dump_info (REPORT_DETAILS))
303
        fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
304
 
305
      switch (relevant)
306
        {
307
        case vect_unused_in_scope:
308
          relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
309
                      vect_used_in_scope : vect_unused_in_scope;
310
          break;
311
 
312
        case vect_used_in_outer_by_reduction:
313
          gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
314
          relevant = vect_used_by_reduction;
315
          break;
316
 
317
        case vect_used_in_outer:
318
          gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
319
          relevant = vect_used_in_scope;
320
          break;
321
 
322
        case vect_used_in_scope:
323
          break;
324
 
325
        default:
326
          gcc_unreachable ();
327
        }
328
    }
329
 
330
  /* case 3b: inner-loop stmt defining an outer-loop stmt:
331
        outer-loop-header-bb:
332
                ...
333
        inner-loop:
334
                d = def_stmt
335
        outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
336
                stmt # use (d)          */
337
  else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
338
    {
339
      if (vect_print_dump_info (REPORT_DETAILS))
340
        fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
341
 
342
      switch (relevant)
343
        {
344
        case vect_unused_in_scope:
345
          relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
346
            || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
347
                      vect_used_in_outer_by_reduction : vect_unused_in_scope;
348
          break;
349
 
350
        case vect_used_by_reduction:
351
          relevant = vect_used_in_outer_by_reduction;
352
          break;
353
 
354
        case vect_used_in_scope:
355
          relevant = vect_used_in_outer;
356
          break;
357
 
358
        default:
359
          gcc_unreachable ();
360
        }
361
    }
362
 
363
  vect_mark_relevant (worklist, def_stmt, relevant, live_p);
364
  return true;
365
}
366
 
367
 
368
/* Function vect_mark_stmts_to_be_vectorized.
369
 
370
   Not all stmts in the loop need to be vectorized. For example:
371
 
372
     for i...
373
       for j...
374
   1.    T0 = i + j
375
   2.    T1 = a[T0]
376
 
377
   3.    j = j + 1
378
 
379
   Stmt 1 and 3 do not need to be vectorized, because loop control and
380
   addressing of vectorized data-refs are handled differently.
381
 
382
   This pass detects such stmts.  */
383
 
384
bool
385
vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
386
{
387
  VEC(gimple,heap) *worklist;
388
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
389
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
390
  unsigned int nbbs = loop->num_nodes;
391
  gimple_stmt_iterator si;
392
  gimple stmt;
393
  unsigned int i;
394
  stmt_vec_info stmt_vinfo;
395
  basic_block bb;
396
  gimple phi;
397
  bool live_p;
398
  enum vect_relevant relevant, tmp_relevant;
399
  enum vect_def_type def_type;
400
 
401
  if (vect_print_dump_info (REPORT_DETAILS))
402
    fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
403
 
404
  worklist = VEC_alloc (gimple, heap, 64);
405
 
406
  /* 1. Init worklist.  */
407
  for (i = 0; i < nbbs; i++)
408
    {
409
      bb = bbs[i];
410
      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
411
        {
412
          phi = gsi_stmt (si);
413
          if (vect_print_dump_info (REPORT_DETAILS))
414
            {
415
              fprintf (vect_dump, "init: phi relevant? ");
416
              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
417
            }
418
 
419
          if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
420
            vect_mark_relevant (&worklist, phi, relevant, live_p);
421
        }
422
      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
423
        {
424
          stmt = gsi_stmt (si);
425
          if (vect_print_dump_info (REPORT_DETAILS))
426
            {
427
              fprintf (vect_dump, "init: stmt relevant? ");
428
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
429
            }
430
 
431
          if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
432
            vect_mark_relevant (&worklist, stmt, relevant, live_p);
433
        }
434
    }
435
 
436
  /* 2. Process_worklist */
437
  while (VEC_length (gimple, worklist) > 0)
438
    {
439
      use_operand_p use_p;
440
      ssa_op_iter iter;
441
 
442
      stmt = VEC_pop (gimple, worklist);
443
      if (vect_print_dump_info (REPORT_DETAILS))
444
        {
445
          fprintf (vect_dump, "worklist: examine stmt: ");
446
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
447
        }
448
 
449
      /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
450
         (DEF_STMT) as relevant/irrelevant and live/dead according to the
451
         liveness and relevance properties of STMT.  */
452
      stmt_vinfo = vinfo_for_stmt (stmt);
453
      relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
454
      live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
455
 
456
      /* Generally, the liveness and relevance properties of STMT are
457
         propagated as is to the DEF_STMTs of its USEs:
458
          live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
459
          relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
460
 
461
         One exception is when STMT has been identified as defining a reduction
462
         variable; in this case we set the liveness/relevance as follows:
463
           live_p = false
464
           relevant = vect_used_by_reduction
465
         This is because we distinguish between two kinds of relevant stmts -
466
         those that are used by a reduction computation, and those that are
467
         (also) used by a regular computation. This allows us later on to
468
         identify stmts that are used solely by a reduction, and therefore the
469
         order of the results that they produce does not have to be kept.  */
470
 
471
      def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
472
      tmp_relevant = relevant;
473
      switch (def_type)
474
        {
475
          case vect_reduction_def:
476
            switch (tmp_relevant)
477
              {
478
                case vect_unused_in_scope:
479
                  relevant = vect_used_by_reduction;
480
                  break;
481
 
482
                case vect_used_by_reduction:
483
                  if (gimple_code (stmt) == GIMPLE_PHI)
484
                    break;
485
                  /* fall through */
486
 
487
                default:
488
                  if (vect_print_dump_info (REPORT_DETAILS))
489
                    fprintf (vect_dump, "unsupported use of reduction.");
490
 
491
                  VEC_free (gimple, heap, worklist);
492
                  return false;
493
              }
494
 
495
            live_p = false;
496
            break;
497
 
498
          case vect_nested_cycle:
499
            if (tmp_relevant != vect_unused_in_scope
500
                && tmp_relevant != vect_used_in_outer_by_reduction
501
                && tmp_relevant != vect_used_in_outer)
502
              {
503
                if (vect_print_dump_info (REPORT_DETAILS))
504
                  fprintf (vect_dump, "unsupported use of nested cycle.");
505
 
506
                VEC_free (gimple, heap, worklist);
507
                return false;
508
              }
509
 
510
            live_p = false;
511
            break;
512
 
513
          case vect_double_reduction_def:
514
            if (tmp_relevant != vect_unused_in_scope
515
                && tmp_relevant != vect_used_by_reduction)
516
              {
517
                if (vect_print_dump_info (REPORT_DETAILS))
518
                  fprintf (vect_dump, "unsupported use of double reduction.");
519
 
520
                VEC_free (gimple, heap, worklist);
521
                return false;
522
              }
523
 
524
            live_p = false;
525
            break;
526
 
527
          default:
528
            break;
529
        }
530
 
531
      FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
532
        {
533
          tree op = USE_FROM_PTR (use_p);
534
          if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
535
            {
536
              VEC_free (gimple, heap, worklist);
537
              return false;
538
            }
539
        }
540
    } /* while worklist */
541
 
542
  VEC_free (gimple, heap, worklist);
543
  return true;
544
}
545
 
546
 
547
int
548
cost_for_stmt (gimple stmt)
549
{
550
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
551
 
552
  switch (STMT_VINFO_TYPE (stmt_info))
553
  {
554
  case load_vec_info_type:
555
    return TARG_SCALAR_LOAD_COST;
556
  case store_vec_info_type:
557
    return TARG_SCALAR_STORE_COST;
558
  case op_vec_info_type:
559
  case condition_vec_info_type:
560
  case assignment_vec_info_type:
561
  case reduc_vec_info_type:
562
  case induc_vec_info_type:
563
  case type_promotion_vec_info_type:
564
  case type_demotion_vec_info_type:
565
  case type_conversion_vec_info_type:
566
  case call_vec_info_type:
567
    return TARG_SCALAR_STMT_COST;
568
  case undef_vec_info_type:
569
  default:
570
    gcc_unreachable ();
571
  }
572
}
573
 
574
/* Function vect_model_simple_cost.
575
 
576
   Models cost for simple operations, i.e. those that only emit ncopies of a
577
   single op.  Right now, this does not account for multiple insns that could
578
   be generated for the single vector op.  We will handle that shortly.  */
579
 
580
void
581
vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
582
                        enum vect_def_type *dt, slp_tree slp_node)
583
{
584
  int i;
585
  int inside_cost = 0, outside_cost = 0;
586
 
587
  /* The SLP costs were already calculated during SLP tree build.  */
588
  if (PURE_SLP_STMT (stmt_info))
589
    return;
590
 
591
  inside_cost = ncopies * TARG_VEC_STMT_COST;
592
 
593
  /* FORNOW: Assuming maximum 2 args per stmts.  */
594
  for (i = 0; i < 2; i++)
595
    {
596
      if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
597
        outside_cost += TARG_SCALAR_TO_VEC_COST;
598
    }
599
 
600
  if (vect_print_dump_info (REPORT_COST))
601
    fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
602
             "outside_cost = %d .", inside_cost, outside_cost);
603
 
604
  /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
605
  stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
606
  stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
607
}
608
 
609
 
610
/* Function vect_cost_strided_group_size
611
 
612
   For strided load or store, return the group_size only if it is the first
613
   load or store of a group, else return 1.  This ensures that group size is
614
   only returned once per group.  */
615
 
616
static int
617
vect_cost_strided_group_size (stmt_vec_info stmt_info)
618
{
619
  gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
620
 
621
  if (first_stmt == STMT_VINFO_STMT (stmt_info))
622
    return DR_GROUP_SIZE (stmt_info);
623
 
624
  return 1;
625
}
626
 
627
 
628
/* Function vect_model_store_cost
629
 
630
   Models cost for stores.  In the case of strided accesses, one access
631
   has the overhead of the strided access attributed to it.  */
632
 
633
void
634
vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
635
                       enum vect_def_type dt, slp_tree slp_node)
636
{
637
  int group_size;
638
  int inside_cost = 0, outside_cost = 0;
639
 
640
  /* The SLP costs were already calculated during SLP tree build.  */
641
  if (PURE_SLP_STMT (stmt_info))
642
    return;
643
 
644
  if (dt == vect_constant_def || dt == vect_external_def)
645
    outside_cost = TARG_SCALAR_TO_VEC_COST;
646
 
647
  /* Strided access?  */
648
  if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
649
    group_size = vect_cost_strided_group_size (stmt_info);
650
  /* Not a strided access.  */
651
  else
652
    group_size = 1;
653
 
654
  /* Is this an access in a group of stores, which provide strided access?
655
     If so, add in the cost of the permutes.  */
656
  if (group_size > 1)
657
    {
658
      /* Uses a high and low interleave operation for each needed permute.  */
659
      inside_cost = ncopies * exact_log2(group_size) * group_size
660
             * TARG_VEC_STMT_COST;
661
 
662
      if (vect_print_dump_info (REPORT_COST))
663
        fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
664
                 group_size);
665
 
666
    }
667
 
668
  /* Costs of the stores.  */
669
  inside_cost += ncopies * TARG_VEC_STORE_COST;
670
 
671
  if (vect_print_dump_info (REPORT_COST))
672
    fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
673
             "outside_cost = %d .", inside_cost, outside_cost);
674
 
675
  /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
676
  stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
677
  stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
678
}
679
 
680
 
681
/* Function vect_model_load_cost
682
 
683
   Models cost for loads.  In the case of strided accesses, the last access
684
   has the overhead of the strided access attributed to it.  Since unaligned
685
   accesses are supported for loads, we also account for the costs of the
686
   access scheme chosen.  */
687
 
688
void
689
vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
690
 
691
{
692
  int group_size;
693
  int alignment_support_cheme;
694
  gimple first_stmt;
695
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
696
  int inside_cost = 0, outside_cost = 0;
697
 
698
  /* The SLP costs were already calculated during SLP tree build.  */
699
  if (PURE_SLP_STMT (stmt_info))
700
    return;
701
 
702
  /* Strided accesses?  */
703
  first_stmt = DR_GROUP_FIRST_DR (stmt_info);
704
  if (first_stmt && !slp_node)
705
    {
706
      group_size = vect_cost_strided_group_size (stmt_info);
707
      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
708
    }
709
  /* Not a strided access.  */
710
  else
711
    {
712
      group_size = 1;
713
      first_dr = dr;
714
    }
715
 
716
  alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
717
 
718
  /* Is this an access in a group of loads providing strided access?
719
     If so, add in the cost of the permutes.  */
720
  if (group_size > 1)
721
    {
722
      /* Uses an even and odd extract operations for each needed permute.  */
723
      inside_cost = ncopies * exact_log2(group_size) * group_size
724
        * TARG_VEC_STMT_COST;
725
 
726
      if (vect_print_dump_info (REPORT_COST))
727
        fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
728
                 group_size);
729
 
730
    }
731
 
732
  /* The loads themselves.  */
733
  switch (alignment_support_cheme)
734
    {
735
    case dr_aligned:
736
      {
737
        inside_cost += ncopies * TARG_VEC_LOAD_COST;
738
 
739
        if (vect_print_dump_info (REPORT_COST))
740
          fprintf (vect_dump, "vect_model_load_cost: aligned.");
741
 
742
        break;
743
      }
744
    case dr_unaligned_supported:
745
      {
746
        /* Here, we assign an additional cost for the unaligned load.  */
747
        inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
748
 
749
        if (vect_print_dump_info (REPORT_COST))
750
          fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
751
                   "hardware.");
752
 
753
        break;
754
      }
755
    case dr_explicit_realign:
756
      {
757
        inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
758
 
759
        /* FIXME: If the misalignment remains fixed across the iterations of
760
           the containing loop, the following cost should be added to the
761
           outside costs.  */
762
        if (targetm.vectorize.builtin_mask_for_load)
763
          inside_cost += TARG_VEC_STMT_COST;
764
 
765
        break;
766
      }
767
    case dr_explicit_realign_optimized:
768
      {
769
        if (vect_print_dump_info (REPORT_COST))
770
          fprintf (vect_dump, "vect_model_load_cost: unaligned software "
771
                   "pipelined.");
772
 
773
        /* Unaligned software pipeline has a load of an address, an initial
774
           load, and possibly a mask operation to "prime" the loop. However,
775
           if this is an access in a group of loads, which provide strided
776
           access, then the above cost should only be considered for one
777
           access in the group. Inside the loop, there is a load op
778
           and a realignment op.  */
779
 
780
        if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
781
          {
782
            outside_cost = 2*TARG_VEC_STMT_COST;
783
            if (targetm.vectorize.builtin_mask_for_load)
784
              outside_cost += TARG_VEC_STMT_COST;
785
          }
786
 
787
        inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
788
 
789
        break;
790
      }
791
 
792
    default:
793
      gcc_unreachable ();
794
    }
795
 
796
  if (vect_print_dump_info (REPORT_COST))
797
    fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
798
             "outside_cost = %d .", inside_cost, outside_cost);
799
 
800
  /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
801
  stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
802
  stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
803
}
804
 
805
 
806
/* Function vect_init_vector.
807
 
808
   Insert a new stmt (INIT_STMT) that initializes a new vector variable with
809
   the vector elements of VECTOR_VAR. Place the initialization at BSI if it
810
   is not NULL. Otherwise, place the initialization at the loop preheader.
811
   Return the DEF of INIT_STMT.
812
   It will be used in the vectorization of STMT.  */
813
 
814
tree
815
vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
816
                  gimple_stmt_iterator *gsi)
817
{
818
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
819
  tree new_var;
820
  gimple init_stmt;
821
  tree vec_oprnd;
822
  edge pe;
823
  tree new_temp;
824
  basic_block new_bb;
825
 
826
  new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
827
  add_referenced_var (new_var);
828
  init_stmt = gimple_build_assign  (new_var, vector_var);
829
  new_temp = make_ssa_name (new_var, init_stmt);
830
  gimple_assign_set_lhs (init_stmt, new_temp);
831
 
832
  if (gsi)
833
    vect_finish_stmt_generation (stmt, init_stmt, gsi);
834
  else
835
    {
836
      loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
837
 
838
      if (loop_vinfo)
839
        {
840
          struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
841
 
842
          if (nested_in_vect_loop_p (loop, stmt))
843
            loop = loop->inner;
844
 
845
          pe = loop_preheader_edge (loop);
846
          new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
847
          gcc_assert (!new_bb);
848
        }
849
      else
850
       {
851
          bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
852
          basic_block bb;
853
          gimple_stmt_iterator gsi_bb_start;
854
 
855
          gcc_assert (bb_vinfo);
856
          bb = BB_VINFO_BB (bb_vinfo);
857
          gsi_bb_start = gsi_after_labels (bb);
858
          gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
859
       }
860
    }
861
 
862
  if (vect_print_dump_info (REPORT_DETAILS))
863
    {
864
      fprintf (vect_dump, "created new init_stmt: ");
865
      print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
866
    }
867
 
868
  vec_oprnd = gimple_assign_lhs (init_stmt);
869
  return vec_oprnd;
870
}
871
 
872
 
873
/* Function vect_get_vec_def_for_operand.
874
 
875
   OP is an operand in STMT. This function returns a (vector) def that will be
876
   used in the vectorized stmt for STMT.
877
 
878
   In the case that OP is an SSA_NAME which is defined in the loop, then
879
   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
880
 
881
   In case OP is an invariant or constant, a new stmt that creates a vector def
882
   needs to be introduced.  */
883
 
884
tree
885
vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
886
{
887
  tree vec_oprnd;
888
  gimple vec_stmt;
889
  gimple def_stmt;
890
  stmt_vec_info def_stmt_info = NULL;
891
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
892
  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
893
  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
894
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
895
  tree vec_inv;
896
  tree vec_cst;
897
  tree t = NULL_TREE;
898
  tree def;
899
  int i;
900
  enum vect_def_type dt;
901
  bool is_simple_use;
902
  tree vector_type;
903
 
904
  if (vect_print_dump_info (REPORT_DETAILS))
905
    {
906
      fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
907
      print_generic_expr (vect_dump, op, TDF_SLIM);
908
    }
909
 
910
  is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
911
                                      &dt);
912
  gcc_assert (is_simple_use);
913
  if (vect_print_dump_info (REPORT_DETAILS))
914
    {
915
      if (def)
916
        {
917
          fprintf (vect_dump, "def =  ");
918
          print_generic_expr (vect_dump, def, TDF_SLIM);
919
        }
920
      if (def_stmt)
921
        {
922
          fprintf (vect_dump, "  def_stmt =  ");
923
          print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
924
        }
925
    }
926
 
927
  switch (dt)
928
    {
929
    /* Case 1: operand is a constant.  */
930
    case vect_constant_def:
931
      {
932
        vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
933
        gcc_assert (vector_type);
934
 
935
        if (scalar_def)
936
          *scalar_def = op;
937
 
938
        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
939
        if (vect_print_dump_info (REPORT_DETAILS))
940
          fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
941
 
942
        for (i = nunits - 1; i >= 0; --i)
943
          {
944
            t = tree_cons (NULL_TREE, op, t);
945
          }
946
        vec_cst = build_vector (vector_type, t);
947
        return vect_init_vector (stmt, vec_cst, vector_type, NULL);
948
      }
949
 
950
    /* Case 2: operand is defined outside the loop - loop invariant.  */
951
    case vect_external_def:
952
      {
953
        vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
954
        gcc_assert (vector_type);
955
        nunits = TYPE_VECTOR_SUBPARTS (vector_type);
956
 
957
        if (scalar_def)
958
          *scalar_def = def;
959
 
960
        /* Create 'vec_inv = {inv,inv,..,inv}'  */
961
        if (vect_print_dump_info (REPORT_DETAILS))
962
          fprintf (vect_dump, "Create vector_inv.");
963
 
964
        for (i = nunits - 1; i >= 0; --i)
965
          {
966
            t = tree_cons (NULL_TREE, def, t);
967
          }
968
 
969
        /* FIXME: use build_constructor directly.  */
970
        vec_inv = build_constructor_from_list (vector_type, t);
971
        return vect_init_vector (stmt, vec_inv, vector_type, NULL);
972
      }
973
 
974
    /* Case 3: operand is defined inside the loop.  */
975
    case vect_internal_def:
976
      {
977
        if (scalar_def)
978
          *scalar_def = NULL/* FIXME tuples: def_stmt*/;
979
 
980
        /* Get the def from the vectorized stmt.  */
981
        def_stmt_info = vinfo_for_stmt (def_stmt);
982
        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
983
        gcc_assert (vec_stmt);
984
        if (gimple_code (vec_stmt) == GIMPLE_PHI)
985
          vec_oprnd = PHI_RESULT (vec_stmt);
986
        else if (is_gimple_call (vec_stmt))
987
          vec_oprnd = gimple_call_lhs (vec_stmt);
988
        else
989
          vec_oprnd = gimple_assign_lhs (vec_stmt);
990
        return vec_oprnd;
991
      }
992
 
993
    /* Case 4: operand is defined by a loop header phi - reduction  */
994
    case vect_reduction_def:
995
    case vect_double_reduction_def:
996
    case vect_nested_cycle:
997
      {
998
        struct loop *loop;
999
 
1000
        gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1001
        loop = (gimple_bb (def_stmt))->loop_father;
1002
 
1003
        /* Get the def before the loop  */
1004
        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1005
        return get_initial_def_for_reduction (stmt, op, scalar_def);
1006
     }
1007
 
1008
    /* Case 5: operand is defined by loop-header phi - induction.  */
1009
    case vect_induction_def:
1010
      {
1011
        gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1012
 
1013
        /* Get the def from the vectorized stmt.  */
1014
        def_stmt_info = vinfo_for_stmt (def_stmt);
1015
        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1016
        gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
1017
        vec_oprnd = PHI_RESULT (vec_stmt);
1018
        return vec_oprnd;
1019
      }
1020
 
1021
    default:
1022
      gcc_unreachable ();
1023
    }
1024
}
1025
 
1026
 
1027
/* Function vect_get_vec_def_for_stmt_copy
1028
 
1029
   Return a vector-def for an operand. This function is used when the
1030
   vectorized stmt to be created (by the caller to this function) is a "copy"
1031
   created in case the vectorized result cannot fit in one vector, and several
1032
   copies of the vector-stmt are required. In this case the vector-def is
1033
   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1034
   of the stmt that defines VEC_OPRND.
1035
   DT is the type of the vector def VEC_OPRND.
1036
 
1037
   Context:
1038
        In case the vectorization factor (VF) is bigger than the number
1039
   of elements that can fit in a vectype (nunits), we have to generate
1040
   more than one vector stmt to vectorize the scalar stmt. This situation
1041
   arises when there are multiple data-types operated upon in the loop; the
1042
   smallest data-type determines the VF, and as a result, when vectorizing
1043
   stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1044
   vector stmt (each computing a vector of 'nunits' results, and together
1045
   computing 'VF' results in each iteration).  This function is called when
1046
   vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1047
   which VF=16 and nunits=4, so the number of copies required is 4):
1048
 
1049
   scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1050
 
1051
   S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1052
                        VS1.1:  vx.1 = memref1      VS1.2
1053
                        VS1.2:  vx.2 = memref2      VS1.3
1054
                        VS1.3:  vx.3 = memref3
1055
 
1056
   S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1057
                        VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1058
                        VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1059
                        VSnew.3:  vz3 = vx.3 + ...
1060
 
1061
   The vectorization of S1 is explained in vectorizable_load.
1062
   The vectorization of S2:
1063
        To create the first vector-stmt out of the 4 copies - VSnew.0 -
1064
   the function 'vect_get_vec_def_for_operand' is called to
1065
   get the relevant vector-def for each operand of S2. For operand x it
1066
   returns  the vector-def 'vx.0'.
1067
 
1068
        To create the remaining copies of the vector-stmt (VSnew.j), this
1069
   function is called to get the relevant vector-def for each operand.  It is
1070
   obtained from the respective VS1.j stmt, which is recorded in the
1071
   STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1072
 
1073
        For example, to obtain the vector-def 'vx.1' in order to create the
1074
   vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1075
   Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1076
   STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1077
   and return its def ('vx.1').
1078
   Overall, to create the above sequence this function will be called 3 times:
1079
        vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1080
        vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1081
        vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1082
 
1083
tree
1084
vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1085
{
1086
  gimple vec_stmt_for_operand;
1087
  stmt_vec_info def_stmt_info;
1088
 
1089
  /* Do nothing; can reuse same def.  */
1090
  if (dt == vect_external_def || dt == vect_constant_def )
1091
    return vec_oprnd;
1092
 
1093
  vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1094
  def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1095
  gcc_assert (def_stmt_info);
1096
  vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1097
  gcc_assert (vec_stmt_for_operand);
1098
  vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1099
  if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1100
    vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1101
  else
1102
    vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1103
  return vec_oprnd;
1104
}
1105
 
1106
 
1107
/* Get vectorized definitions for the operands to create a copy of an original
1108
   stmt. See vect_get_vec_def_for_stmt_copy() for details.  */
1109
 
1110
static void
1111
vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1112
                                 VEC(tree,heap) **vec_oprnds0,
1113
                                 VEC(tree,heap) **vec_oprnds1)
1114
{
1115
  tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1116
 
1117
  vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1118
  VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1119
 
1120
  if (vec_oprnds1 && *vec_oprnds1)
1121
    {
1122
      vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1123
      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1124
      VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1125
    }
1126
}
1127
 
1128
 
1129
/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL.  */
1130
 
1131
static void
1132
vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1133
                   VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1134
                   slp_tree slp_node)
1135
{
1136
  if (slp_node)
1137
    vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1138
  else
1139
    {
1140
      tree vec_oprnd;
1141
 
1142
      *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1143
      vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1144
      VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1145
 
1146
      if (op1)
1147
        {
1148
          *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1149
          vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1150
          VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1151
        }
1152
    }
1153
}
1154
 
1155
 
1156
/* Function vect_finish_stmt_generation.
1157
 
1158
   Insert a new stmt.  */
1159
 
1160
void
1161
vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1162
                             gimple_stmt_iterator *gsi)
1163
{
1164
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1165
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1166
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1167
 
1168
  gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1169
 
1170
  gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1171
 
1172
  set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1173
                                                   bb_vinfo));
1174
 
1175
  if (vect_print_dump_info (REPORT_DETAILS))
1176
    {
1177
      fprintf (vect_dump, "add new stmt: ");
1178
      print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1179
    }
1180
 
1181
  gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1182
}
1183
 
1184
/* Checks if CALL can be vectorized in type VECTYPE.  Returns
1185
   a function declaration if the target has a vectorized version
1186
   of the function, or NULL_TREE if the function cannot be vectorized.  */
1187
 
1188
tree
1189
vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1190
{
1191
  tree fndecl = gimple_call_fndecl (call);
1192
 
1193
  /* We only handle functions that do not read or clobber memory -- i.e.
1194
     const or novops ones.  */
1195
  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1196
    return NULL_TREE;
1197
 
1198
  if (!fndecl
1199
      || TREE_CODE (fndecl) != FUNCTION_DECL
1200
      || !DECL_BUILT_IN (fndecl))
1201
    return NULL_TREE;
1202
 
1203
  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1204
                                                        vectype_in);
1205
}
1206
 
1207
/* Function vectorizable_call.
1208
 
1209
   Check if STMT performs a function call that can be vectorized.
1210
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1211
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1212
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1213
 
1214
static bool
1215
vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1216
{
1217
  tree vec_dest;
1218
  tree scalar_dest;
1219
  tree op, type;
1220
  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1221
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1222
  tree vectype_out, vectype_in;
1223
  int nunits_in;
1224
  int nunits_out;
1225
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1226
  tree fndecl, new_temp, def, rhs_type, lhs_type;
1227
  gimple def_stmt;
1228
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1229
  gimple new_stmt = NULL;
1230
  int ncopies, j;
1231
  VEC(tree, heap) *vargs = NULL;
1232
  enum { NARROW, NONE, WIDEN } modifier;
1233
  size_t i, nargs;
1234
 
1235
  /* FORNOW: unsupported in basic block SLP.  */
1236
  gcc_assert (loop_vinfo);
1237
 
1238
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1239
    return false;
1240
 
1241
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1242
    return false;
1243
 
1244
  /* FORNOW: SLP not supported.  */
1245
  if (STMT_SLP_TYPE (stmt_info))
1246
    return false;
1247
 
1248
  /* Is STMT a vectorizable call?   */
1249
  if (!is_gimple_call (stmt))
1250
    return false;
1251
 
1252
  if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1253
    return false;
1254
 
1255
  /* Process function arguments.  */
1256
  rhs_type = NULL_TREE;
1257
  nargs = gimple_call_num_args (stmt);
1258
 
1259
  /* Bail out if the function has more than two arguments, we
1260
     do not have interesting builtin functions to vectorize with
1261
     more than two arguments.  No arguments is also not good.  */
1262
  if (nargs == 0 || nargs > 2)
1263
    return false;
1264
 
1265
  for (i = 0; i < nargs; i++)
1266
    {
1267
      op = gimple_call_arg (stmt, i);
1268
 
1269
      /* We can only handle calls with arguments of the same type.  */
1270
      if (rhs_type
1271
          && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1272
        {
1273
          if (vect_print_dump_info (REPORT_DETAILS))
1274
            fprintf (vect_dump, "argument types differ.");
1275
          return false;
1276
        }
1277
      rhs_type = TREE_TYPE (op);
1278
 
1279
      if (!vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt[i]))
1280
        {
1281
          if (vect_print_dump_info (REPORT_DETAILS))
1282
            fprintf (vect_dump, "use not simple.");
1283
          return false;
1284
        }
1285
    }
1286
 
1287
  vectype_in = get_vectype_for_scalar_type (rhs_type);
1288
  if (!vectype_in)
1289
    return false;
1290
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1291
 
1292
  lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1293
  vectype_out = get_vectype_for_scalar_type (lhs_type);
1294
  if (!vectype_out)
1295
    return false;
1296
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1297
 
1298
  /* FORNOW */
1299
  if (nunits_in == nunits_out / 2)
1300
    modifier = NARROW;
1301
  else if (nunits_out == nunits_in)
1302
    modifier = NONE;
1303
  else if (nunits_out == nunits_in / 2)
1304
    modifier = WIDEN;
1305
  else
1306
    return false;
1307
 
1308
  /* For now, we only vectorize functions if a target specific builtin
1309
     is available.  TODO -- in some cases, it might be profitable to
1310
     insert the calls for pieces of the vector, in order to be able
1311
     to vectorize other operations in the loop.  */
1312
  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1313
  if (fndecl == NULL_TREE)
1314
    {
1315
      if (vect_print_dump_info (REPORT_DETAILS))
1316
        fprintf (vect_dump, "function is not vectorizable.");
1317
 
1318
      return false;
1319
    }
1320
 
1321
  gcc_assert (!gimple_vuse (stmt));
1322
 
1323
  if (modifier == NARROW)
1324
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1325
  else
1326
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1327
 
1328
  /* Sanity check: make sure that at least one copy of the vectorized stmt
1329
     needs to be generated.  */
1330
  gcc_assert (ncopies >= 1);
1331
 
1332
  if (!vec_stmt) /* transformation not required.  */
1333
    {
1334
      STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1335
      if (vect_print_dump_info (REPORT_DETAILS))
1336
        fprintf (vect_dump, "=== vectorizable_call ===");
1337
      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1338
      return true;
1339
    }
1340
 
1341
  /** Transform.  **/
1342
 
1343
  if (vect_print_dump_info (REPORT_DETAILS))
1344
    fprintf (vect_dump, "transform operation.");
1345
 
1346
  /* Handle def.  */
1347
  scalar_dest = gimple_call_lhs (stmt);
1348
  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1349
 
1350
  prev_stmt_info = NULL;
1351
  switch (modifier)
1352
    {
1353
    case NONE:
1354
      for (j = 0; j < ncopies; ++j)
1355
        {
1356
          /* Build argument list for the vectorized call.  */
1357
          if (j == 0)
1358
            vargs = VEC_alloc (tree, heap, nargs);
1359
          else
1360
            VEC_truncate (tree, vargs, 0);
1361
 
1362
          for (i = 0; i < nargs; i++)
1363
            {
1364
              op = gimple_call_arg (stmt, i);
1365
              if (j == 0)
1366
                vec_oprnd0
1367
                  = vect_get_vec_def_for_operand (op, stmt, NULL);
1368
              else
1369
                {
1370
                  vec_oprnd0 = gimple_call_arg (new_stmt, i);
1371
                  vec_oprnd0
1372
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1373
                }
1374
 
1375
              VEC_quick_push (tree, vargs, vec_oprnd0);
1376
            }
1377
 
1378
          new_stmt = gimple_build_call_vec (fndecl, vargs);
1379
          new_temp = make_ssa_name (vec_dest, new_stmt);
1380
          gimple_call_set_lhs (new_stmt, new_temp);
1381
 
1382
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
1383
          mark_symbols_for_renaming (new_stmt);
1384
 
1385
          if (j == 0)
1386
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1387
          else
1388
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1389
 
1390
          prev_stmt_info = vinfo_for_stmt (new_stmt);
1391
        }
1392
 
1393
      break;
1394
 
1395
    case NARROW:
1396
      for (j = 0; j < ncopies; ++j)
1397
        {
1398
          /* Build argument list for the vectorized call.  */
1399
          if (j == 0)
1400
            vargs = VEC_alloc (tree, heap, nargs * 2);
1401
          else
1402
            VEC_truncate (tree, vargs, 0);
1403
 
1404
          for (i = 0; i < nargs; i++)
1405
            {
1406
              op = gimple_call_arg (stmt, i);
1407
              if (j == 0)
1408
                {
1409
                  vec_oprnd0
1410
                    = vect_get_vec_def_for_operand (op, stmt, NULL);
1411
                  vec_oprnd1
1412
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1413
                }
1414
              else
1415
                {
1416
                  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i);
1417
                  vec_oprnd0
1418
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1419
                  vec_oprnd1
1420
                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1421
                }
1422
 
1423
              VEC_quick_push (tree, vargs, vec_oprnd0);
1424
              VEC_quick_push (tree, vargs, vec_oprnd1);
1425
            }
1426
 
1427
          new_stmt = gimple_build_call_vec (fndecl, vargs);
1428
          new_temp = make_ssa_name (vec_dest, new_stmt);
1429
          gimple_call_set_lhs (new_stmt, new_temp);
1430
 
1431
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
1432
          mark_symbols_for_renaming (new_stmt);
1433
 
1434
          if (j == 0)
1435
            STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1436
          else
1437
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1438
 
1439
          prev_stmt_info = vinfo_for_stmt (new_stmt);
1440
        }
1441
 
1442
      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1443
 
1444
      break;
1445
 
1446
    case WIDEN:
1447
      /* No current target implements this case.  */
1448
      return false;
1449
    }
1450
 
1451
  VEC_free (tree, heap, vargs);
1452
 
1453
  /* Update the exception handling table with the vector stmt if necessary.  */
1454
  if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1455
    gimple_purge_dead_eh_edges (gimple_bb (stmt));
1456
 
1457
  /* The call in STMT might prevent it from being removed in dce.
1458
     We however cannot remove it here, due to the way the ssa name
1459
     it defines is mapped to the new definition.  So just replace
1460
     rhs of the statement with something harmless.  */
1461
 
1462
  type = TREE_TYPE (scalar_dest);
1463
  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1464
                                  fold_convert (type, integer_zero_node));
1465
  set_vinfo_for_stmt (new_stmt, stmt_info);
1466
  set_vinfo_for_stmt (stmt, NULL);
1467
  STMT_VINFO_STMT (stmt_info) = new_stmt;
1468
  gsi_replace (gsi, new_stmt, false);
1469
  SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1470
 
1471
  return true;
1472
}
1473
 
1474
 
1475
/* Function vect_gen_widened_results_half
1476
 
1477
   Create a vector stmt whose code, type, number of arguments, and result
1478
   variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1479
   VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1480
   In the case that CODE is a CALL_EXPR, this means that a call to DECL
1481
   needs to be created (DECL is a function-decl of a target-builtin).
1482
   STMT is the original scalar stmt that we are vectorizing.  */
1483
 
1484
static gimple
1485
vect_gen_widened_results_half (enum tree_code code,
1486
                               tree decl,
1487
                               tree vec_oprnd0, tree vec_oprnd1, int op_type,
1488
                               tree vec_dest, gimple_stmt_iterator *gsi,
1489
                               gimple stmt)
1490
{
1491
  gimple new_stmt;
1492
  tree new_temp;
1493
 
1494
  /* Generate half of the widened result:  */
1495
  if (code == CALL_EXPR)
1496
    {
1497
      /* Target specific support  */
1498
      if (op_type == binary_op)
1499
        new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1500
      else
1501
        new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1502
      new_temp = make_ssa_name (vec_dest, new_stmt);
1503
      gimple_call_set_lhs (new_stmt, new_temp);
1504
    }
1505
  else
1506
    {
1507
      /* Generic support */
1508
      gcc_assert (op_type == TREE_CODE_LENGTH (code));
1509
      if (op_type != binary_op)
1510
        vec_oprnd1 = NULL;
1511
      new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1512
                                               vec_oprnd1);
1513
      new_temp = make_ssa_name (vec_dest, new_stmt);
1514
      gimple_assign_set_lhs (new_stmt, new_temp);
1515
    }
1516
  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1517
 
1518
  return new_stmt;
1519
}
1520
 
1521
 
1522
/* Check if STMT performs a conversion operation, that can be vectorized.
1523
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1524
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1525
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1526
 
1527
static bool
1528
vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1529
                         gimple *vec_stmt, slp_tree slp_node)
1530
{
1531
  tree vec_dest;
1532
  tree scalar_dest;
1533
  tree op0;
1534
  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1535
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1536
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1537
  enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1538
  tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1539
  tree new_temp;
1540
  tree def;
1541
  gimple def_stmt;
1542
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1543
  gimple new_stmt = NULL;
1544
  stmt_vec_info prev_stmt_info;
1545
  int nunits_in;
1546
  int nunits_out;
1547
  tree vectype_out, vectype_in;
1548
  int ncopies, j;
1549
  tree rhs_type, lhs_type;
1550
  tree builtin_decl;
1551
  enum { NARROW, NONE, WIDEN } modifier;
1552
  int i;
1553
  VEC(tree,heap) *vec_oprnds0 = NULL;
1554
  tree vop0;
1555
  tree integral_type;
1556
  VEC(tree,heap) *dummy = NULL;
1557
  int dummy_int;
1558
 
1559
  /* Is STMT a vectorizable conversion?   */
1560
 
1561
  /* FORNOW: unsupported in basic block SLP.  */
1562
  gcc_assert (loop_vinfo);
1563
 
1564
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1565
    return false;
1566
 
1567
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1568
    return false;
1569
 
1570
  if (!is_gimple_assign (stmt))
1571
    return false;
1572
 
1573
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1574
    return false;
1575
 
1576
  code = gimple_assign_rhs_code (stmt);
1577
  if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1578
    return false;
1579
 
1580
  /* Check types of lhs and rhs.  */
1581
  op0 = gimple_assign_rhs1 (stmt);
1582
  rhs_type = TREE_TYPE (op0);
1583
  vectype_in = get_vectype_for_scalar_type (rhs_type);
1584
  if (!vectype_in)
1585
    return false;
1586
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1587
 
1588
  scalar_dest = gimple_assign_lhs (stmt);
1589
  lhs_type = TREE_TYPE (scalar_dest);
1590
  vectype_out = get_vectype_for_scalar_type (lhs_type);
1591
  if (!vectype_out)
1592
    return false;
1593
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1594
 
1595
  /* FORNOW */
1596
  if (nunits_in == nunits_out / 2)
1597
    modifier = NARROW;
1598
  else if (nunits_out == nunits_in)
1599
    modifier = NONE;
1600
  else if (nunits_out == nunits_in / 2)
1601
    modifier = WIDEN;
1602
  else
1603
    return false;
1604
 
1605
  if (modifier == NONE)
1606
    gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1607
 
1608
  /* Bail out if the types are both integral or non-integral.  */
1609
  if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1610
      || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1611
    return false;
1612
 
1613
  integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1614
 
1615
  if (modifier == NARROW)
1616
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1617
  else
1618
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1619
 
1620
  /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1621
     this, so we can safely override NCOPIES with 1 here.  */
1622
  if (slp_node)
1623
    ncopies = 1;
1624
 
1625
  /* Sanity check: make sure that at least one copy of the vectorized stmt
1626
     needs to be generated.  */
1627
  gcc_assert (ncopies >= 1);
1628
 
1629
  /* Check the operands of the operation.  */
1630
  if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
1631
    {
1632
      if (vect_print_dump_info (REPORT_DETAILS))
1633
        fprintf (vect_dump, "use not simple.");
1634
      return false;
1635
    }
1636
 
1637
  /* Supportable by target?  */
1638
  if ((modifier == NONE
1639
       && !targetm.vectorize.builtin_conversion (code, integral_type))
1640
      || (modifier == WIDEN
1641
          && !supportable_widening_operation (code, stmt, vectype_in,
1642
                                              &decl1, &decl2,
1643
                                              &code1, &code2,
1644
                                              &dummy_int, &dummy))
1645
      || (modifier == NARROW
1646
          && !supportable_narrowing_operation (code, stmt, vectype_in,
1647
                                               &code1, &dummy_int, &dummy)))
1648
    {
1649
      if (vect_print_dump_info (REPORT_DETAILS))
1650
        fprintf (vect_dump, "conversion not supported by target.");
1651
      return false;
1652
    }
1653
 
1654
  if (modifier != NONE)
1655
    {
1656
      STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1657
      /* FORNOW: SLP not supported.  */
1658
      if (STMT_SLP_TYPE (stmt_info))
1659
        return false;
1660
    }
1661
 
1662
  if (!vec_stmt)                /* transformation not required.  */
1663
    {
1664
      STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1665
      return true;
1666
    }
1667
 
1668
  /** Transform.  **/
1669
  if (vect_print_dump_info (REPORT_DETAILS))
1670
    fprintf (vect_dump, "transform conversion.");
1671
 
1672
  /* Handle def.  */
1673
  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1674
 
1675
  if (modifier == NONE && !slp_node)
1676
    vec_oprnds0 = VEC_alloc (tree, heap, 1);
1677
 
1678
  prev_stmt_info = NULL;
1679
  switch (modifier)
1680
    {
1681
    case NONE:
1682
      for (j = 0; j < ncopies; j++)
1683
        {
1684
          if (j == 0)
1685
            vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1686
          else
1687
            vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1688
 
1689
          builtin_decl =
1690
            targetm.vectorize.builtin_conversion (code, integral_type);
1691
          for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1692
            {
1693
              /* Arguments are ready. create the new vector stmt.  */
1694
              new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1695
              new_temp = make_ssa_name (vec_dest, new_stmt);
1696
              gimple_call_set_lhs (new_stmt, new_temp);
1697
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
1698
              if (slp_node)
1699
                VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1700
            }
1701
 
1702
          if (j == 0)
1703
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1704
          else
1705
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1706
          prev_stmt_info = vinfo_for_stmt (new_stmt);
1707
        }
1708
      break;
1709
 
1710
    case WIDEN:
1711
      /* In case the vectorization factor (VF) is bigger than the number
1712
         of elements that we can fit in a vectype (nunits), we have to
1713
         generate more than one vector stmt - i.e - we need to "unroll"
1714
         the vector stmt by a factor VF/nunits.  */
1715
      for (j = 0; j < ncopies; j++)
1716
        {
1717
          if (j == 0)
1718
            vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1719
          else
1720
            vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1721
 
1722
          STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1723
 
1724
          /* Generate first half of the widened result:  */
1725
          new_stmt
1726
            = vect_gen_widened_results_half (code1, decl1,
1727
                                             vec_oprnd0, vec_oprnd1,
1728
                                             unary_op, vec_dest, gsi, stmt);
1729
          if (j == 0)
1730
            STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1731
          else
1732
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1733
          prev_stmt_info = vinfo_for_stmt (new_stmt);
1734
 
1735
          /* Generate second half of the widened result:  */
1736
          new_stmt
1737
            = vect_gen_widened_results_half (code2, decl2,
1738
                                             vec_oprnd0, vec_oprnd1,
1739
                                             unary_op, vec_dest, gsi, stmt);
1740
          STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1741
          prev_stmt_info = vinfo_for_stmt (new_stmt);
1742
        }
1743
      break;
1744
 
1745
    case NARROW:
1746
      /* In case the vectorization factor (VF) is bigger than the number
1747
         of elements that we can fit in a vectype (nunits), we have to
1748
         generate more than one vector stmt - i.e - we need to "unroll"
1749
         the vector stmt by a factor VF/nunits.  */
1750
      for (j = 0; j < ncopies; j++)
1751
        {
1752
          /* Handle uses.  */
1753
          if (j == 0)
1754
            {
1755
              vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1756
              vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1757
            }
1758
          else
1759
            {
1760
              vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1761
              vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1762
            }
1763
 
1764
          /* Arguments are ready. Create the new vector stmt.  */
1765
          new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1766
                                                   vec_oprnd1);
1767
          new_temp = make_ssa_name (vec_dest, new_stmt);
1768
          gimple_assign_set_lhs (new_stmt, new_temp);
1769
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
1770
 
1771
          if (j == 0)
1772
            STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1773
          else
1774
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1775
 
1776
          prev_stmt_info = vinfo_for_stmt (new_stmt);
1777
        }
1778
 
1779
      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1780
    }
1781
 
1782
  if (vec_oprnds0)
1783
    VEC_free (tree, heap, vec_oprnds0);
1784
 
1785
  return true;
1786
}
1787
/* Function vectorizable_assignment.
1788
 
1789
   Check if STMT performs an assignment (copy) that can be vectorized.
1790
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1791
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1792
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1793
 
1794
static bool
1795
vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1796
                         gimple *vec_stmt, slp_tree slp_node)
1797
{
1798
  tree vec_dest;
1799
  tree scalar_dest;
1800
  tree op;
1801
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1802
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1803
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1804
  tree new_temp;
1805
  tree def;
1806
  gimple def_stmt;
1807
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1808
  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1809
  int ncopies;
1810
  int i, j;
1811
  VEC(tree,heap) *vec_oprnds = NULL;
1812
  tree vop;
1813
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1814
  gimple new_stmt = NULL;
1815
  stmt_vec_info prev_stmt_info = NULL;
1816
  enum tree_code code;
1817
  tree vectype_in, vectype_out;
1818
 
1819
  /* Multiple types in SLP are handled by creating the appropriate number of
1820
     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1821
     case of SLP.  */
1822
  if (slp_node)
1823
    ncopies = 1;
1824
  else
1825
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1826
 
1827
  gcc_assert (ncopies >= 1);
1828
 
1829
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1830
    return false;
1831
 
1832
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1833
    return false;
1834
 
1835
  /* Is vectorizable assignment?  */
1836
  if (!is_gimple_assign (stmt))
1837
    return false;
1838
 
1839
  scalar_dest = gimple_assign_lhs (stmt);
1840
  if (TREE_CODE (scalar_dest) != SSA_NAME)
1841
    return false;
1842
 
1843
  code = gimple_assign_rhs_code (stmt);
1844
  if (gimple_assign_single_p (stmt)
1845
      || code == PAREN_EXPR
1846
      || CONVERT_EXPR_CODE_P (code))
1847
    op = gimple_assign_rhs1 (stmt);
1848
  else
1849
    return false;
1850
 
1851
  if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
1852
    {
1853
      if (vect_print_dump_info (REPORT_DETAILS))
1854
        fprintf (vect_dump, "use not simple.");
1855
      return false;
1856
    }
1857
 
1858
  /* We can handle NOP_EXPR conversions that do not change the number
1859
     of elements or the vector size.  */
1860
  vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
1861
  vectype_out
1862
    = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1863
  if (CONVERT_EXPR_CODE_P (code)
1864
      && (!vectype_in
1865
          || !vectype_out
1866
          || (TYPE_VECTOR_SUBPARTS (vectype_out)
1867
              != TYPE_VECTOR_SUBPARTS (vectype_in))
1868
          || (GET_MODE_SIZE (TYPE_MODE (vectype_out))
1869
              != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
1870
    return false;
1871
 
1872
  if (!vec_stmt) /* transformation not required.  */
1873
    {
1874
      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1875
      if (vect_print_dump_info (REPORT_DETAILS))
1876
        fprintf (vect_dump, "=== vectorizable_assignment ===");
1877
      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1878
      return true;
1879
    }
1880
 
1881
  /** Transform.  **/
1882
  if (vect_print_dump_info (REPORT_DETAILS))
1883
    fprintf (vect_dump, "transform assignment.");
1884
 
1885
  /* Handle def.  */
1886
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1887
 
1888
  /* Handle use.  */
1889
  for (j = 0; j < ncopies; j++)
1890
    {
1891
      /* Handle uses.  */
1892
      if (j == 0)
1893
        vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1894
      else
1895
        vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
1896
 
1897
      /* Arguments are ready. create the new vector stmt.  */
1898
      for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1899
       {
1900
         if (CONVERT_EXPR_CODE_P (code))
1901
           vop = build1 (VIEW_CONVERT_EXPR, vectype_out, vop);
1902
         new_stmt = gimple_build_assign (vec_dest, vop);
1903
         new_temp = make_ssa_name (vec_dest, new_stmt);
1904
         gimple_assign_set_lhs (new_stmt, new_temp);
1905
         vect_finish_stmt_generation (stmt, new_stmt, gsi);
1906
         if (slp_node)
1907
           VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1908
       }
1909
 
1910
      if (slp_node)
1911
        continue;
1912
 
1913
      if (j == 0)
1914
        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1915
      else
1916
        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1917
 
1918
      prev_stmt_info = vinfo_for_stmt (new_stmt);
1919
    }
1920
 
1921
  VEC_free (tree, heap, vec_oprnds);
1922
  return true;
1923
}
1924
 
1925
/* Function vectorizable_operation.
1926
 
1927
   Check if STMT performs a binary or unary operation that can be vectorized.
1928
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1929
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1930
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1931
 
1932
static bool
1933
vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1934
                        gimple *vec_stmt, slp_tree slp_node)
1935
{
1936
  tree vec_dest;
1937
  tree scalar_dest;
1938
  tree op0, op1 = NULL;
1939
  tree vec_oprnd1 = NULL_TREE;
1940
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1941
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1942
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1943
  enum tree_code code;
1944
  enum machine_mode vec_mode;
1945
  tree new_temp;
1946
  int op_type;
1947
  optab optab;
1948
  int icode;
1949
  enum machine_mode optab_op2_mode;
1950
  tree def;
1951
  gimple def_stmt;
1952
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1953
  gimple new_stmt = NULL;
1954
  stmt_vec_info prev_stmt_info;
1955
  int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1956
  int nunits_out;
1957
  tree vectype_out;
1958
  int ncopies;
1959
  int j, i;
1960
  VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1961
  tree vop0, vop1;
1962
  unsigned int k;
1963
  bool scalar_shift_arg = false;
1964
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1965
  int vf;
1966
 
1967
  if (loop_vinfo)
1968
    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1969
  else
1970
    vf = 1;
1971
 
1972
  /* Multiple types in SLP are handled by creating the appropriate number of
1973
     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1974
     case of SLP.  */
1975
  if (slp_node)
1976
    ncopies = 1;
1977
  else
1978
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1979
 
1980
  gcc_assert (ncopies >= 1);
1981
 
1982
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1983
    return false;
1984
 
1985
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1986
    return false;
1987
 
1988
  /* Is STMT a vectorizable binary/unary operation?   */
1989
  if (!is_gimple_assign (stmt))
1990
    return false;
1991
 
1992
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1993
    return false;
1994
 
1995
  scalar_dest = gimple_assign_lhs (stmt);
1996
  vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1997
  if (!vectype_out)
1998
    return false;
1999
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2000
  if (nunits_out != nunits_in)
2001
    return false;
2002
 
2003
  code = gimple_assign_rhs_code (stmt);
2004
 
2005
  /* For pointer addition, we should use the normal plus for
2006
     the vector addition.  */
2007
  if (code == POINTER_PLUS_EXPR)
2008
    code = PLUS_EXPR;
2009
 
2010
  /* Support only unary or binary operations.  */
2011
  op_type = TREE_CODE_LENGTH (code);
2012
  if (op_type != unary_op && op_type != binary_op)
2013
    {
2014
      if (vect_print_dump_info (REPORT_DETAILS))
2015
        fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2016
      return false;
2017
    }
2018
 
2019
  op0 = gimple_assign_rhs1 (stmt);
2020
  if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
2021
    {
2022
      if (vect_print_dump_info (REPORT_DETAILS))
2023
        fprintf (vect_dump, "use not simple.");
2024
      return false;
2025
    }
2026
 
2027
  if (op_type == binary_op)
2028
    {
2029
      op1 = gimple_assign_rhs2 (stmt);
2030
      if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2031
                               &dt[1]))
2032
        {
2033
          if (vect_print_dump_info (REPORT_DETAILS))
2034
            fprintf (vect_dump, "use not simple.");
2035
          return false;
2036
        }
2037
    }
2038
 
2039
  /* If this is a shift/rotate, determine whether the shift amount is a vector,
2040
     or scalar.  If the shift/rotate amount is a vector, use the vector/vector
2041
     shift optabs.  */
2042
  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2043
      || code == RROTATE_EXPR)
2044
    {
2045
      /* vector shifted by vector */
2046
      if (dt[1] == vect_internal_def)
2047
        {
2048
          optab = optab_for_tree_code (code, vectype, optab_vector);
2049
          if (vect_print_dump_info (REPORT_DETAILS))
2050
            fprintf (vect_dump, "vector/vector shift/rotate found.");
2051
        }
2052
 
2053
      /* See if the machine has a vector shifted by scalar insn and if not
2054
         then see if it has a vector shifted by vector insn */
2055
      else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2056
        {
2057
          optab = optab_for_tree_code (code, vectype, optab_scalar);
2058
          if (optab
2059
              && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2060
                  != CODE_FOR_nothing))
2061
            {
2062
              scalar_shift_arg = true;
2063
              if (vect_print_dump_info (REPORT_DETAILS))
2064
                fprintf (vect_dump, "vector/scalar shift/rotate found.");
2065
            }
2066
          else
2067
            {
2068
              optab = optab_for_tree_code (code, vectype, optab_vector);
2069
              if (optab
2070
                  && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2071
                      != CODE_FOR_nothing))
2072
                {
2073
                  if (vect_print_dump_info (REPORT_DETAILS))
2074
                    fprintf (vect_dump, "vector/vector shift/rotate found.");
2075
 
2076
                  /* Unlike the other binary operators, shifts/rotates have
2077
                     the rhs being int, instead of the same type as the lhs,
2078
                     so make sure the scalar is the right type if we are
2079
                     dealing with vectors of short/char.  */
2080
                  if (dt[1] == vect_constant_def)
2081
                    op1 = fold_convert (TREE_TYPE (vectype), op1);
2082
                }
2083
            }
2084
        }
2085
 
2086
      else
2087
        {
2088
          if (vect_print_dump_info (REPORT_DETAILS))
2089
            fprintf (vect_dump, "operand mode requires invariant argument.");
2090
          return false;
2091
        }
2092
    }
2093
  else
2094
    optab = optab_for_tree_code (code, vectype, optab_default);
2095
 
2096
  /* Supportable by target?  */
2097
  if (!optab)
2098
    {
2099
      if (vect_print_dump_info (REPORT_DETAILS))
2100
        fprintf (vect_dump, "no optab.");
2101
      return false;
2102
    }
2103
  vec_mode = TYPE_MODE (vectype);
2104
  icode = (int) optab_handler (optab, vec_mode)->insn_code;
2105
  if (icode == CODE_FOR_nothing)
2106
    {
2107
      if (vect_print_dump_info (REPORT_DETAILS))
2108
        fprintf (vect_dump, "op not supported by target.");
2109
      /* Check only during analysis.  */
2110
      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2111
          || (vf < vect_min_worthwhile_factor (code)
2112
              && !vec_stmt))
2113
        return false;
2114
      if (vect_print_dump_info (REPORT_DETAILS))
2115
        fprintf (vect_dump, "proceeding using word mode.");
2116
    }
2117
 
2118
  /* Worthwhile without SIMD support? Check only during analysis.  */
2119
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2120
      && vf < vect_min_worthwhile_factor (code)
2121
      && !vec_stmt)
2122
    {
2123
      if (vect_print_dump_info (REPORT_DETAILS))
2124
        fprintf (vect_dump, "not worthwhile without SIMD support.");
2125
      return false;
2126
    }
2127
 
2128
  if (!vec_stmt) /* transformation not required.  */
2129
    {
2130
      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2131
      if (vect_print_dump_info (REPORT_DETAILS))
2132
        fprintf (vect_dump, "=== vectorizable_operation ===");
2133
      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2134
      return true;
2135
    }
2136
 
2137
  /** Transform.  **/
2138
 
2139
  if (vect_print_dump_info (REPORT_DETAILS))
2140
    fprintf (vect_dump, "transform binary/unary operation.");
2141
 
2142
  /* Handle def.  */
2143
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
2144
 
2145
  /* Allocate VECs for vector operands. In case of SLP, vector operands are
2146
     created in the previous stages of the recursion, so no allocation is
2147
     needed, except for the case of shift with scalar shift argument. In that
2148
     case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2149
     be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2150
     In case of loop-based vectorization we allocate VECs of size 1. We
2151
     allocate VEC_OPRNDS1 only in case of binary operation.  */
2152
  if (!slp_node)
2153
    {
2154
      vec_oprnds0 = VEC_alloc (tree, heap, 1);
2155
      if (op_type == binary_op)
2156
        vec_oprnds1 = VEC_alloc (tree, heap, 1);
2157
    }
2158
  else if (scalar_shift_arg)
2159
    vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2160
 
2161
  /* In case the vectorization factor (VF) is bigger than the number
2162
     of elements that we can fit in a vectype (nunits), we have to generate
2163
     more than one vector stmt - i.e - we need to "unroll" the
2164
     vector stmt by a factor VF/nunits. In doing so, we record a pointer
2165
     from one copy of the vector stmt to the next, in the field
2166
     STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2167
     stages to find the correct vector defs to be used when vectorizing
2168
     stmts that use the defs of the current stmt. The example below illustrates
2169
     the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2170
     4 vectorized stmts):
2171
 
2172
     before vectorization:
2173
                                RELATED_STMT    VEC_STMT
2174
        S1:     x = memref      -               -
2175
        S2:     z = x + 1       -               -
2176
 
2177
     step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2178
             there):
2179
                                RELATED_STMT    VEC_STMT
2180
        VS1_0:  vx0 = memref0   VS1_1           -
2181
        VS1_1:  vx1 = memref1   VS1_2           -
2182
        VS1_2:  vx2 = memref2   VS1_3           -
2183
        VS1_3:  vx3 = memref3   -               -
2184
        S1:     x = load        -               VS1_0
2185
        S2:     z = x + 1       -               -
2186
 
2187
     step2: vectorize stmt S2 (done here):
2188
        To vectorize stmt S2 we first need to find the relevant vector
2189
        def for the first operand 'x'. This is, as usual, obtained from
2190
        the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2191
        that defines 'x' (S1). This way we find the stmt VS1_0, and the
2192
        relevant vector def 'vx0'. Having found 'vx0' we can generate
2193
        the vector stmt VS2_0, and as usual, record it in the
2194
        STMT_VINFO_VEC_STMT of stmt S2.
2195
        When creating the second copy (VS2_1), we obtain the relevant vector
2196
        def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2197
        stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2198
        vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2199
        pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2200
        Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2201
        chain of stmts and pointers:
2202
                                RELATED_STMT    VEC_STMT
2203
        VS1_0:  vx0 = memref0   VS1_1           -
2204
        VS1_1:  vx1 = memref1   VS1_2           -
2205
        VS1_2:  vx2 = memref2   VS1_3           -
2206
        VS1_3:  vx3 = memref3   -               -
2207
        S1:     x = load        -               VS1_0
2208
        VS2_0:  vz0 = vx0 + v1  VS2_1           -
2209
        VS2_1:  vz1 = vx1 + v1  VS2_2           -
2210
        VS2_2:  vz2 = vx2 + v1  VS2_3           -
2211
        VS2_3:  vz3 = vx3 + v1  -               -
2212
        S2:     z = x + 1       -               VS2_0  */
2213
 
2214
  prev_stmt_info = NULL;
2215
  for (j = 0; j < ncopies; j++)
2216
    {
2217
      /* Handle uses.  */
2218
      if (j == 0)
2219
        {
2220
          if (op_type == binary_op && scalar_shift_arg)
2221
            {
2222
              /* Vector shl and shr insn patterns can be defined with scalar
2223
                 operand 2 (shift operand). In this case, use constant or loop
2224
                 invariant op1 directly, without extending it to vector mode
2225
                 first.  */
2226
              optab_op2_mode = insn_data[icode].operand[2].mode;
2227
              if (!VECTOR_MODE_P (optab_op2_mode))
2228
                {
2229
                  if (vect_print_dump_info (REPORT_DETAILS))
2230
                    fprintf (vect_dump, "operand 1 using scalar mode.");
2231
                  vec_oprnd1 = op1;
2232
                  VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2233
                  if (slp_node)
2234
                    {
2235
                      /* Store vec_oprnd1 for every vector stmt to be created
2236
                         for SLP_NODE. We check during the analysis that all the
2237
                         shift arguments are the same.
2238
                         TODO: Allow different constants for different vector
2239
                         stmts generated for an SLP instance.  */
2240
                      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2241
                        VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2242
                    }
2243
                }
2244
            }
2245
 
2246
          /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2247
             (a special case for certain kind of vector shifts); otherwise,
2248
             operand 1 should be of a vector type (the usual case).  */
2249
          if (op_type == binary_op && !vec_oprnd1)
2250
            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2251
                               slp_node);
2252
          else
2253
            vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2254
                               slp_node);
2255
        }
2256
      else
2257
        vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2258
 
2259
      /* Arguments are ready. Create the new vector stmt.  */
2260
      for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2261
        {
2262
          vop1 = ((op_type == binary_op)
2263
                  ? VEC_index (tree, vec_oprnds1, i) : NULL);
2264
          new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2265
          new_temp = make_ssa_name (vec_dest, new_stmt);
2266
          gimple_assign_set_lhs (new_stmt, new_temp);
2267
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
2268
          if (slp_node)
2269
            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2270
        }
2271
 
2272
      if (slp_node)
2273
        continue;
2274
 
2275
      if (j == 0)
2276
        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2277
      else
2278
        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2279
      prev_stmt_info = vinfo_for_stmt (new_stmt);
2280
    }
2281
 
2282
  VEC_free (tree, heap, vec_oprnds0);
2283
  if (vec_oprnds1)
2284
    VEC_free (tree, heap, vec_oprnds1);
2285
 
2286
  return true;
2287
}
2288
 
2289
 
2290
/* Get vectorized definitions for loop-based vectorization. For the first
2291
   operand we call vect_get_vec_def_for_operand() (with OPRND containing
2292
   scalar operand), and for the rest we get a copy with
2293
   vect_get_vec_def_for_stmt_copy() using the previous vector definition
2294
   (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2295
   The vectors are collected into VEC_OPRNDS.  */
2296
 
2297
static void
2298
vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2299
                          VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2300
{
2301
  tree vec_oprnd;
2302
 
2303
  /* Get first vector operand.  */
2304
  /* All the vector operands except the very first one (that is scalar oprnd)
2305
     are stmt copies.  */
2306
  if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2307
    vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2308
  else
2309
    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2310
 
2311
  VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2312
 
2313
  /* Get second vector operand.  */
2314
  vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2315
  VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2316
 
2317
  *oprnd = vec_oprnd;
2318
 
2319
  /* For conversion in multiple steps, continue to get operands
2320
     recursively.  */
2321
  if (multi_step_cvt)
2322
    vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
2323
}
2324
 
2325
 
2326
/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2327
   For multi-step conversions store the resulting vectors and call the function
2328
   recursively.  */
2329
 
2330
static void
2331
vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2332
                                       int multi_step_cvt, gimple stmt,
2333
                                       VEC (tree, heap) *vec_dsts,
2334
                                       gimple_stmt_iterator *gsi,
2335
                                       slp_tree slp_node, enum tree_code code,
2336
                                       stmt_vec_info *prev_stmt_info)
2337
{
2338
  unsigned int i;
2339
  tree vop0, vop1, new_tmp, vec_dest;
2340
  gimple new_stmt;
2341
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2342
 
2343
  vec_dest = VEC_pop (tree, vec_dsts);
2344
 
2345
  for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2346
    {
2347
      /* Create demotion operation.  */
2348
      vop0 = VEC_index (tree, *vec_oprnds, i);
2349
      vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2350
      new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2351
      new_tmp = make_ssa_name (vec_dest, new_stmt);
2352
      gimple_assign_set_lhs (new_stmt, new_tmp);
2353
      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2354
 
2355
      if (multi_step_cvt)
2356
        /* Store the resulting vector for next recursive call.  */
2357
        VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2358
      else
2359
        {
2360
          /* This is the last step of the conversion sequence. Store the
2361
             vectors in SLP_NODE or in vector info of the scalar statement
2362
             (or in STMT_VINFO_RELATED_STMT chain).  */
2363
          if (slp_node)
2364
            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2365
          else
2366
            {
2367
              if (!*prev_stmt_info)
2368
                STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2369
              else
2370
                STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2371
 
2372
              *prev_stmt_info = vinfo_for_stmt (new_stmt);
2373
            }
2374
        }
2375
    }
2376
 
2377
  /* For multi-step demotion operations we first generate demotion operations
2378
     from the source type to the intermediate types, and then combine the
2379
     results (stored in VEC_OPRNDS) in demotion operation to the destination
2380
     type.  */
2381
  if (multi_step_cvt)
2382
    {
2383
      /* At each level of recursion we have have of the operands we had at the
2384
         previous level.  */
2385
      VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2386
      vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2387
                                             stmt, vec_dsts, gsi, slp_node,
2388
                                             code, prev_stmt_info);
2389
    }
2390
}
2391
 
2392
 
2393
/* Function vectorizable_type_demotion
2394
 
2395
   Check if STMT performs a binary or unary operation that involves
2396
   type demotion, and if it can be vectorized.
2397
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2398
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2399
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2400
 
2401
static bool
2402
vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2403
                            gimple *vec_stmt, slp_tree slp_node)
2404
{
2405
  tree vec_dest;
2406
  tree scalar_dest;
2407
  tree op0;
2408
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2409
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2410
  enum tree_code code, code1 = ERROR_MARK;
2411
  tree def;
2412
  gimple def_stmt;
2413
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2414
  stmt_vec_info prev_stmt_info;
2415
  int nunits_in;
2416
  int nunits_out;
2417
  tree vectype_out;
2418
  int ncopies;
2419
  int j, i;
2420
  tree vectype_in;
2421
  int multi_step_cvt = 0;
2422
  VEC (tree, heap) *vec_oprnds0 = NULL;
2423
  VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2424
  tree last_oprnd, intermediate_type;
2425
 
2426
  /* FORNOW: not supported by basic block SLP vectorization.  */
2427
  gcc_assert (loop_vinfo);
2428
 
2429
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
2430
    return false;
2431
 
2432
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2433
    return false;
2434
 
2435
  /* Is STMT a vectorizable type-demotion operation?  */
2436
  if (!is_gimple_assign (stmt))
2437
    return false;
2438
 
2439
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2440
    return false;
2441
 
2442
  code = gimple_assign_rhs_code (stmt);
2443
  if (!CONVERT_EXPR_CODE_P (code))
2444
    return false;
2445
 
2446
  op0 = gimple_assign_rhs1 (stmt);
2447
  vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2448
  if (!vectype_in)
2449
    return false;
2450
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2451
 
2452
  scalar_dest = gimple_assign_lhs (stmt);
2453
  vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2454
  if (!vectype_out)
2455
    return false;
2456
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2457
  if (nunits_in >= nunits_out)
2458
    return false;
2459
 
2460
  /* Multiple types in SLP are handled by creating the appropriate number of
2461
     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2462
     case of SLP.  */
2463
  if (slp_node)
2464
    ncopies = 1;
2465
  else
2466
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2467
  gcc_assert (ncopies >= 1);
2468
 
2469
  if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2470
          && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2471
         || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2472
             && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2473
             && CONVERT_EXPR_CODE_P (code))))
2474
    return false;
2475
 
2476
  /* Check the operands of the operation.  */
2477
  if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2478
    {
2479
      if (vect_print_dump_info (REPORT_DETAILS))
2480
        fprintf (vect_dump, "use not simple.");
2481
      return false;
2482
    }
2483
 
2484
  /* Supportable by target?  */
2485
  if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2486
                                        &multi_step_cvt, &interm_types))
2487
    return false;
2488
 
2489
  STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2490
 
2491
  if (!vec_stmt) /* transformation not required.  */
2492
    {
2493
      STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2494
      if (vect_print_dump_info (REPORT_DETAILS))
2495
        fprintf (vect_dump, "=== vectorizable_demotion ===");
2496
      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2497
      return true;
2498
    }
2499
 
2500
  /** Transform.  **/
2501
  if (vect_print_dump_info (REPORT_DETAILS))
2502
    fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2503
             ncopies);
2504
 
2505
  /* In case of multi-step demotion, we first generate demotion operations to
2506
     the intermediate types, and then from that types to the final one.
2507
     We create vector destinations for the intermediate type (TYPES) received
2508
     from supportable_narrowing_operation, and store them in the correct order
2509
     for future use in vect_create_vectorized_demotion_stmts().  */
2510
  if (multi_step_cvt)
2511
    vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2512
  else
2513
    vec_dsts = VEC_alloc (tree, heap, 1);
2514
 
2515
  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2516
  VEC_quick_push (tree, vec_dsts, vec_dest);
2517
 
2518
  if (multi_step_cvt)
2519
    {
2520
      for (i = VEC_length (tree, interm_types) - 1;
2521
           VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2522
        {
2523
          vec_dest = vect_create_destination_var (scalar_dest,
2524
                                                  intermediate_type);
2525
          VEC_quick_push (tree, vec_dsts, vec_dest);
2526
        }
2527
    }
2528
 
2529
  /* In case the vectorization factor (VF) is bigger than the number
2530
     of elements that we can fit in a vectype (nunits), we have to generate
2531
     more than one vector stmt - i.e - we need to "unroll" the
2532
     vector stmt by a factor VF/nunits.   */
2533
  last_oprnd = op0;
2534
  prev_stmt_info = NULL;
2535
  for (j = 0; j < ncopies; j++)
2536
    {
2537
      /* Handle uses.  */
2538
      if (slp_node)
2539
        vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2540
      else
2541
        {
2542
          VEC_free (tree, heap, vec_oprnds0);
2543
          vec_oprnds0 = VEC_alloc (tree, heap,
2544
                        (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2545
          vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2546
                                    vect_pow2 (multi_step_cvt) - 1);
2547
        }
2548
 
2549
      /* Arguments are ready. Create the new vector stmts.  */
2550
      tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2551
      vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2552
                                             multi_step_cvt, stmt, tmp_vec_dsts,
2553
                                             gsi, slp_node, code1,
2554
                                             &prev_stmt_info);
2555
    }
2556
 
2557
  VEC_free (tree, heap, vec_oprnds0);
2558
  VEC_free (tree, heap, vec_dsts);
2559
  VEC_free (tree, heap, tmp_vec_dsts);
2560
  VEC_free (tree, heap, interm_types);
2561
 
2562
  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2563
  return true;
2564
}
2565
 
2566
 
2567
/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2568
   and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2569
   the resulting vectors and call the function recursively.  */
2570
 
2571
static void
2572
vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2573
                                        VEC (tree, heap) **vec_oprnds1,
2574
                                        int multi_step_cvt, gimple stmt,
2575
                                        VEC (tree, heap) *vec_dsts,
2576
                                        gimple_stmt_iterator *gsi,
2577
                                        slp_tree slp_node, enum tree_code code1,
2578
                                        enum tree_code code2, tree decl1,
2579
                                        tree decl2, int op_type,
2580
                                        stmt_vec_info *prev_stmt_info)
2581
{
2582
  int i;
2583
  tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2584
  gimple new_stmt1, new_stmt2;
2585
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2586
  VEC (tree, heap) *vec_tmp;
2587
 
2588
  vec_dest = VEC_pop (tree, vec_dsts);
2589
  vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2590
 
2591
  for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2592
    {
2593
      if (op_type == binary_op)
2594
        vop1 = VEC_index (tree, *vec_oprnds1, i);
2595
      else
2596
        vop1 = NULL_TREE;
2597
 
2598
      /* Generate the two halves of promotion operation.  */
2599
      new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2600
                                                 op_type, vec_dest, gsi, stmt);
2601
      new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2602
                                                 op_type, vec_dest, gsi, stmt);
2603
      if (is_gimple_call (new_stmt1))
2604
        {
2605
          new_tmp1 = gimple_call_lhs (new_stmt1);
2606
          new_tmp2 = gimple_call_lhs (new_stmt2);
2607
        }
2608
      else
2609
        {
2610
          new_tmp1 = gimple_assign_lhs (new_stmt1);
2611
          new_tmp2 = gimple_assign_lhs (new_stmt2);
2612
        }
2613
 
2614
      if (multi_step_cvt)
2615
        {
2616
          /* Store the results for the recursive call.  */
2617
          VEC_quick_push (tree, vec_tmp, new_tmp1);
2618
          VEC_quick_push (tree, vec_tmp, new_tmp2);
2619
        }
2620
      else
2621
        {
2622
          /* Last step of promotion sequience - store the results.  */
2623
          if (slp_node)
2624
            {
2625
              VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2626
              VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2627
            }
2628
          else
2629
            {
2630
              if (!*prev_stmt_info)
2631
                STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2632
              else
2633
                STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2634
 
2635
              *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2636
              STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2637
              *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2638
            }
2639
        }
2640
    }
2641
 
2642
  if (multi_step_cvt)
2643
    {
2644
      /* For multi-step promotion operation we first generate we call the
2645
         function recurcively for every stage. We start from the input type,
2646
         create promotion operations to the intermediate types, and then
2647
         create promotions to the output type.  */
2648
      *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2649
      VEC_free (tree, heap, vec_tmp);
2650
      vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2651
                                              multi_step_cvt - 1, stmt,
2652
                                              vec_dsts, gsi, slp_node, code1,
2653
                                              code2, decl2, decl2, op_type,
2654
                                              prev_stmt_info);
2655
    }
2656
}
2657
 
2658
 
2659
/* Function vectorizable_type_promotion
2660
 
2661
   Check if STMT performs a binary or unary operation that involves
2662
   type promotion, and if it can be vectorized.
2663
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2664
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2665
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2666
 
2667
static bool
2668
vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2669
                             gimple *vec_stmt, slp_tree slp_node)
2670
{
2671
  tree vec_dest;
2672
  tree scalar_dest;
2673
  tree op0, op1 = NULL;
2674
  tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2675
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2676
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2677
  enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2678
  tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2679
  int op_type;
2680
  tree def;
2681
  gimple def_stmt;
2682
  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2683
  stmt_vec_info prev_stmt_info;
2684
  int nunits_in;
2685
  int nunits_out;
2686
  tree vectype_out;
2687
  int ncopies;
2688
  int j, i;
2689
  tree vectype_in;
2690
  tree intermediate_type = NULL_TREE;
2691
  int multi_step_cvt = 0;
2692
  VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2693
  VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2694
 
2695
  /* FORNOW: not supported by basic block SLP vectorization.  */
2696
  gcc_assert (loop_vinfo);
2697
 
2698
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
2699
    return false;
2700
 
2701
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2702
    return false;
2703
 
2704
  /* Is STMT a vectorizable type-promotion operation?  */
2705
  if (!is_gimple_assign (stmt))
2706
    return false;
2707
 
2708
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2709
    return false;
2710
 
2711
  code = gimple_assign_rhs_code (stmt);
2712
  if (!CONVERT_EXPR_CODE_P (code)
2713
      && code != WIDEN_MULT_EXPR)
2714
    return false;
2715
 
2716
  op0 = gimple_assign_rhs1 (stmt);
2717
  vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2718
  if (!vectype_in)
2719
    return false;
2720
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2721
 
2722
  scalar_dest = gimple_assign_lhs (stmt);
2723
  vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2724
  if (!vectype_out)
2725
    return false;
2726
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2727
  if (nunits_in <= nunits_out)
2728
    return false;
2729
 
2730
  /* Multiple types in SLP are handled by creating the appropriate number of
2731
     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2732
     case of SLP.  */
2733
  if (slp_node)
2734
    ncopies = 1;
2735
  else
2736
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2737
 
2738
  gcc_assert (ncopies >= 1);
2739
 
2740
  if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2741
          && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2742
         || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2743
             && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2744
             && CONVERT_EXPR_CODE_P (code))))
2745
    return false;
2746
 
2747
  /* Check the operands of the operation.  */
2748
  if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2749
    {
2750
      if (vect_print_dump_info (REPORT_DETAILS))
2751
        fprintf (vect_dump, "use not simple.");
2752
      return false;
2753
    }
2754
 
2755
  op_type = TREE_CODE_LENGTH (code);
2756
  if (op_type == binary_op)
2757
    {
2758
      op1 = gimple_assign_rhs2 (stmt);
2759
      if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
2760
        {
2761
          if (vect_print_dump_info (REPORT_DETAILS))
2762
            fprintf (vect_dump, "use not simple.");
2763
          return false;
2764
        }
2765
    }
2766
 
2767
  /* Supportable by target?  */
2768
  if (!supportable_widening_operation (code, stmt, vectype_in,
2769
                                       &decl1, &decl2, &code1, &code2,
2770
                                       &multi_step_cvt, &interm_types))
2771
    return false;
2772
 
2773
  /* Binary widening operation can only be supported directly by the
2774
     architecture.  */
2775
  gcc_assert (!(multi_step_cvt && op_type == binary_op));
2776
 
2777
  STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2778
 
2779
  if (!vec_stmt) /* transformation not required.  */
2780
    {
2781
      STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2782
      if (vect_print_dump_info (REPORT_DETAILS))
2783
        fprintf (vect_dump, "=== vectorizable_promotion ===");
2784
      vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2785
      return true;
2786
    }
2787
 
2788
  /** Transform.  **/
2789
 
2790
  if (vect_print_dump_info (REPORT_DETAILS))
2791
    fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2792
                        ncopies);
2793
 
2794
  /* Handle def.  */
2795
  /* In case of multi-step promotion, we first generate promotion operations
2796
     to the intermediate types, and then from that types to the final one.
2797
     We store vector destination in VEC_DSTS in the correct order for
2798
     recursive creation of promotion operations in
2799
     vect_create_vectorized_promotion_stmts(). Vector destinations are created
2800
     according to TYPES recieved from supportable_widening_operation().   */
2801
  if (multi_step_cvt)
2802
    vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2803
  else
2804
    vec_dsts = VEC_alloc (tree, heap, 1);
2805
 
2806
  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2807
  VEC_quick_push (tree, vec_dsts, vec_dest);
2808
 
2809
  if (multi_step_cvt)
2810
    {
2811
      for (i = VEC_length (tree, interm_types) - 1;
2812
           VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2813
        {
2814
          vec_dest = vect_create_destination_var (scalar_dest,
2815
                                                  intermediate_type);
2816
          VEC_quick_push (tree, vec_dsts, vec_dest);
2817
        }
2818
    }
2819
 
2820
  if (!slp_node)
2821
    {
2822
      vec_oprnds0 = VEC_alloc (tree, heap,
2823
                            (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2824
      if (op_type == binary_op)
2825
        vec_oprnds1 = VEC_alloc (tree, heap, 1);
2826
    }
2827
 
2828
  /* In case the vectorization factor (VF) is bigger than the number
2829
     of elements that we can fit in a vectype (nunits), we have to generate
2830
     more than one vector stmt - i.e - we need to "unroll" the
2831
     vector stmt by a factor VF/nunits.   */
2832
 
2833
  prev_stmt_info = NULL;
2834
  for (j = 0; j < ncopies; j++)
2835
    {
2836
      /* Handle uses.  */
2837
      if (j == 0)
2838
        {
2839
          if (slp_node)
2840
              vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2841
          else
2842
            {
2843
              vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2844
              VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2845
              if (op_type == binary_op)
2846
                {
2847
                  vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2848
                  VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2849
                }
2850
            }
2851
        }
2852
      else
2853
        {
2854
          vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2855
          VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2856
          if (op_type == binary_op)
2857
            {
2858
              vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2859
              VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2860
            }
2861
        }
2862
 
2863
      /* Arguments are ready. Create the new vector stmts.  */
2864
      tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2865
      vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2866
                                              multi_step_cvt, stmt,
2867
                                              tmp_vec_dsts,
2868
                                              gsi, slp_node, code1, code2,
2869
                                              decl1, decl2, op_type,
2870
                                              &prev_stmt_info);
2871
    }
2872
 
2873
  VEC_free (tree, heap, vec_dsts);
2874
  VEC_free (tree, heap, tmp_vec_dsts);
2875
  VEC_free (tree, heap, interm_types);
2876
  VEC_free (tree, heap, vec_oprnds0);
2877
  VEC_free (tree, heap, vec_oprnds1);
2878
 
2879
  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2880
  return true;
2881
}
2882
 
2883
 
2884
/* Function vectorizable_store.
2885
 
2886
   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2887
   can be vectorized.
2888
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2889
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2890
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2891
 
2892
static bool
2893
vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2894
                    slp_tree slp_node)
2895
{
2896
  tree scalar_dest;
2897
  tree data_ref;
2898
  tree op;
2899
  tree vec_oprnd = NULL_TREE;
2900
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2901
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2902
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2903
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2904
  struct loop *loop = NULL;
2905
  enum machine_mode vec_mode;
2906
  tree dummy;
2907
  enum dr_alignment_support alignment_support_scheme;
2908
  tree def;
2909
  gimple def_stmt;
2910
  enum vect_def_type dt;
2911
  stmt_vec_info prev_stmt_info = NULL;
2912
  tree dataref_ptr = NULL_TREE;
2913
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2914
  int ncopies;
2915
  int j;
2916
  gimple next_stmt, first_stmt = NULL;
2917
  bool strided_store = false;
2918
  unsigned int group_size, i;
2919
  VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2920
  bool inv_p;
2921
  VEC(tree,heap) *vec_oprnds = NULL;
2922
  bool slp = (slp_node != NULL);
2923
  unsigned int vec_num;
2924
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2925
 
2926
  if (loop_vinfo)
2927
    loop = LOOP_VINFO_LOOP (loop_vinfo);
2928
 
2929
  /* Multiple types in SLP are handled by creating the appropriate number of
2930
     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2931
     case of SLP.  */
2932
  if (slp)
2933
    ncopies = 1;
2934
  else
2935
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2936
 
2937
  gcc_assert (ncopies >= 1);
2938
 
2939
  /* FORNOW. This restriction should be relaxed.  */
2940
  if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2941
    {
2942
      if (vect_print_dump_info (REPORT_DETAILS))
2943
        fprintf (vect_dump, "multiple types in nested loop.");
2944
      return false;
2945
    }
2946
 
2947
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2948
    return false;
2949
 
2950
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2951
    return false;
2952
 
2953
  /* Is vectorizable store? */
2954
 
2955
  if (!is_gimple_assign (stmt))
2956
    return false;
2957
 
2958
  scalar_dest = gimple_assign_lhs (stmt);
2959
  if (TREE_CODE (scalar_dest) != ARRAY_REF
2960
      && TREE_CODE (scalar_dest) != INDIRECT_REF
2961
      && TREE_CODE (scalar_dest) != COMPONENT_REF
2962
      && TREE_CODE (scalar_dest) != IMAGPART_EXPR
2963
      && TREE_CODE (scalar_dest) != REALPART_EXPR)
2964
    return false;
2965
 
2966
  gcc_assert (gimple_assign_single_p (stmt));
2967
  op = gimple_assign_rhs1 (stmt);
2968
  if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
2969
    {
2970
      if (vect_print_dump_info (REPORT_DETAILS))
2971
        fprintf (vect_dump, "use not simple.");
2972
      return false;
2973
    }
2974
 
2975
  /* The scalar rhs type needs to be trivially convertible to the vector
2976
     component type.  This should always be the case.  */
2977
  if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2978
    {
2979
      if (vect_print_dump_info (REPORT_DETAILS))
2980
        fprintf (vect_dump, "???  operands of different types");
2981
      return false;
2982
    }
2983
 
2984
  vec_mode = TYPE_MODE (vectype);
2985
  /* FORNOW. In some cases can vectorize even if data-type not supported
2986
     (e.g. - array initialization with 0).  */
2987
  if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2988
    return false;
2989
 
2990
  if (!STMT_VINFO_DATA_REF (stmt_info))
2991
    return false;
2992
 
2993
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2994
    {
2995
      strided_store = true;
2996
      first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2997
      if (!vect_strided_store_supported (vectype)
2998
          && !PURE_SLP_STMT (stmt_info) && !slp)
2999
        return false;
3000
 
3001
      if (first_stmt == stmt)
3002
        {
3003
          /* STMT is the leader of the group. Check the operands of all the
3004
             stmts of the group.  */
3005
          next_stmt = DR_GROUP_NEXT_DR (stmt_info);
3006
          while (next_stmt)
3007
            {
3008
              gcc_assert (gimple_assign_single_p (next_stmt));
3009
              op = gimple_assign_rhs1 (next_stmt);
3010
              if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3011
                                       &def, &dt))
3012
                {
3013
                  if (vect_print_dump_info (REPORT_DETAILS))
3014
                    fprintf (vect_dump, "use not simple.");
3015
                  return false;
3016
                }
3017
              next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3018
            }
3019
        }
3020
    }
3021
 
3022
  if (!vec_stmt) /* transformation not required.  */
3023
    {
3024
      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3025
      vect_model_store_cost (stmt_info, ncopies, dt, NULL);
3026
      return true;
3027
    }
3028
 
3029
  /** Transform.  **/
3030
 
3031
  if (strided_store)
3032
    {
3033
      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3034
      group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3035
 
3036
      DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3037
 
3038
      /* FORNOW */
3039
      gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3040
 
3041
      /* We vectorize all the stmts of the interleaving group when we
3042
         reach the last stmt in the group.  */
3043
      if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3044
          < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
3045
          && !slp)
3046
        {
3047
          *vec_stmt = NULL;
3048
          return true;
3049
        }
3050
 
3051
      if (slp)
3052
        strided_store = false;
3053
 
3054
      /* VEC_NUM is the number of vect stmts to be created for this group.  */
3055
      if (slp)
3056
        vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3057
      else
3058
        vec_num = group_size;
3059
    }
3060
  else
3061
    {
3062
      first_stmt = stmt;
3063
      first_dr = dr;
3064
      group_size = vec_num = 1;
3065
    }
3066
 
3067
  if (vect_print_dump_info (REPORT_DETAILS))
3068
    fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3069
 
3070
  dr_chain = VEC_alloc (tree, heap, group_size);
3071
  oprnds = VEC_alloc (tree, heap, group_size);
3072
 
3073
  alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3074
  gcc_assert (alignment_support_scheme);
3075
 
3076
  /* In case the vectorization factor (VF) is bigger than the number
3077
     of elements that we can fit in a vectype (nunits), we have to generate
3078
     more than one vector stmt - i.e - we need to "unroll" the
3079
     vector stmt by a factor VF/nunits.  For more details see documentation in
3080
     vect_get_vec_def_for_copy_stmt.  */
3081
 
3082
  /* In case of interleaving (non-unit strided access):
3083
 
3084
        S1:  &base + 2 = x2
3085
        S2:  &base = x0
3086
        S3:  &base + 1 = x1
3087
        S4:  &base + 3 = x3
3088
 
3089
     We create vectorized stores starting from base address (the access of the
3090
     first stmt in the chain (S2 in the above example), when the last store stmt
3091
     of the chain (S4) is reached:
3092
 
3093
        VS1: &base = vx2
3094
        VS2: &base + vec_size*1 = vx0
3095
        VS3: &base + vec_size*2 = vx1
3096
        VS4: &base + vec_size*3 = vx3
3097
 
3098
     Then permutation statements are generated:
3099
 
3100
        VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3101
        VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3102
        ...
3103
 
3104
     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3105
     (the order of the data-refs in the output of vect_permute_store_chain
3106
     corresponds to the order of scalar stmts in the interleaving chain - see
3107
     the documentation of vect_permute_store_chain()).
3108
 
3109
     In case of both multiple types and interleaving, above vector stores and
3110
     permutation stmts are created for every copy. The result vector stmts are
3111
     put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3112
     STMT_VINFO_RELATED_STMT for the next copies.
3113
  */
3114
 
3115
  prev_stmt_info = NULL;
3116
  for (j = 0; j < ncopies; j++)
3117
    {
3118
      gimple new_stmt;
3119
      gimple ptr_incr;
3120
 
3121
      if (j == 0)
3122
        {
3123
          if (slp)
3124
            {
3125
              /* Get vectorized arguments for SLP_NODE.  */
3126
              vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3127
 
3128
              vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3129
            }
3130
          else
3131
            {
3132
              /* For interleaved stores we collect vectorized defs for all the
3133
                 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3134
                 used as an input to vect_permute_store_chain(), and OPRNDS as
3135
                 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3136
 
3137
                 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3138
                 OPRNDS are of size 1.  */
3139
              next_stmt = first_stmt;
3140
              for (i = 0; i < group_size; i++)
3141
                {
3142
                  /* Since gaps are not supported for interleaved stores,
3143
                     GROUP_SIZE is the exact number of stmts in the chain.
3144
                     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
3145
                     there is no interleaving, GROUP_SIZE is 1, and only one
3146
                     iteration of the loop will be executed.  */
3147
                  gcc_assert (next_stmt
3148
                              && gimple_assign_single_p (next_stmt));
3149
                  op = gimple_assign_rhs1 (next_stmt);
3150
 
3151
                  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3152
                                                            NULL);
3153
                  VEC_quick_push(tree, dr_chain, vec_oprnd);
3154
                  VEC_quick_push(tree, oprnds, vec_oprnd);
3155
                  next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3156
                }
3157
            }
3158
 
3159
          /* We should have catched mismatched types earlier.  */
3160
          gcc_assert (useless_type_conversion_p (vectype,
3161
                                                 TREE_TYPE (vec_oprnd)));
3162
          dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3163
                                                  &dummy, &ptr_incr, false,
3164
                                                  &inv_p);
3165
          gcc_assert (bb_vinfo || !inv_p);
3166
        }
3167
      else
3168
        {
3169
          /* For interleaved stores we created vectorized defs for all the
3170
             defs stored in OPRNDS in the previous iteration (previous copy).
3171
             DR_CHAIN is then used as an input to vect_permute_store_chain(),
3172
             and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3173
             next copy.
3174
             If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3175
             OPRNDS are of size 1.  */
3176
          for (i = 0; i < group_size; i++)
3177
            {
3178
              op = VEC_index (tree, oprnds, i);
3179
              vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3180
                                  &dt);
3181
              vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3182
              VEC_replace(tree, dr_chain, i, vec_oprnd);
3183
              VEC_replace(tree, oprnds, i, vec_oprnd);
3184
            }
3185
          dataref_ptr =
3186
                bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3187
        }
3188
 
3189
      if (strided_store)
3190
        {
3191
          result_chain = VEC_alloc (tree, heap, group_size);
3192
          /* Permute.  */
3193
          if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3194
                                         &result_chain))
3195
            return false;
3196
        }
3197
 
3198
      next_stmt = first_stmt;
3199
      for (i = 0; i < vec_num; i++)
3200
        {
3201
          if (i > 0)
3202
            /* Bump the vector pointer.  */
3203
            dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3204
                                           NULL_TREE);
3205
 
3206
          if (slp)
3207
            vec_oprnd = VEC_index (tree, vec_oprnds, i);
3208
          else if (strided_store)
3209
            /* For strided stores vectorized defs are interleaved in
3210
               vect_permute_store_chain().  */
3211
            vec_oprnd = VEC_index (tree, result_chain, i);
3212
 
3213
          if (aligned_access_p (first_dr))
3214
            data_ref = build_fold_indirect_ref (dataref_ptr);
3215
          else
3216
          {
3217
            int mis = DR_MISALIGNMENT (first_dr);
3218
            tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3219
            tmis = size_binop (MULT_EXPR, tmis, size_int (BITS_PER_UNIT));
3220
            data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3221
           }
3222
 
3223
          /* If accesses through a pointer to vectype do not alias the original
3224
             memory reference we have a problem.  This should never happen.  */
3225
          gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3226
                      get_alias_set (gimple_assign_lhs (stmt))));
3227
 
3228
          /* Arguments are ready. Create the new vector stmt.  */
3229
          new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3230
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
3231
          mark_symbols_for_renaming (new_stmt);
3232
 
3233
          if (slp)
3234
            continue;
3235
 
3236
          if (j == 0)
3237
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt =  new_stmt;
3238
          else
3239
            STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3240
 
3241
          prev_stmt_info = vinfo_for_stmt (new_stmt);
3242
          next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3243
          if (!next_stmt)
3244
            break;
3245
        }
3246
    }
3247
 
3248
  VEC_free (tree, heap, dr_chain);
3249
  VEC_free (tree, heap, oprnds);
3250
  if (result_chain)
3251
    VEC_free (tree, heap, result_chain);
3252
 
3253
  return true;
3254
}
3255
 
3256
/* vectorizable_load.
3257
 
3258
   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3259
   can be vectorized.
3260
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3261
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3262
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3263
 
3264
static bool
3265
vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3266
                   slp_tree slp_node, slp_instance slp_node_instance)
3267
{
3268
  tree scalar_dest;
3269
  tree vec_dest = NULL;
3270
  tree data_ref = NULL;
3271
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3272
  stmt_vec_info prev_stmt_info;
3273
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3274
  struct loop *loop = NULL;
3275
  struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3276
  bool nested_in_vect_loop = false;
3277
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3278
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3279
  tree new_temp;
3280
  int mode;
3281
  gimple new_stmt = NULL;
3282
  tree dummy;
3283
  enum dr_alignment_support alignment_support_scheme;
3284
  tree dataref_ptr = NULL_TREE;
3285
  gimple ptr_incr;
3286
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3287
  int ncopies;
3288
  int i, j, group_size;
3289
  tree msq = NULL_TREE, lsq;
3290
  tree offset = NULL_TREE;
3291
  tree realignment_token = NULL_TREE;
3292
  gimple phi = NULL;
3293
  VEC(tree,heap) *dr_chain = NULL;
3294
  bool strided_load = false;
3295
  gimple first_stmt;
3296
  tree scalar_type;
3297
  bool inv_p;
3298
  bool compute_in_loop = false;
3299
  struct loop *at_loop;
3300
  int vec_num;
3301
  bool slp = (slp_node != NULL);
3302
  bool slp_perm = false;
3303
  enum tree_code code;
3304
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3305
  int vf;
3306
 
3307
  if (loop_vinfo)
3308
    {
3309
      loop = LOOP_VINFO_LOOP (loop_vinfo);
3310
      nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3311
      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3312
    }
3313
  else
3314
    vf = 1;
3315
 
3316
  /* Multiple types in SLP are handled by creating the appropriate number of
3317
     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3318
     case of SLP.  */
3319
  if (slp)
3320
    ncopies = 1;
3321
  else
3322
    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3323
 
3324
  gcc_assert (ncopies >= 1);
3325
 
3326
  /* FORNOW. This restriction should be relaxed.  */
3327
  if (nested_in_vect_loop && ncopies > 1)
3328
    {
3329
      if (vect_print_dump_info (REPORT_DETAILS))
3330
        fprintf (vect_dump, "multiple types in nested loop.");
3331
      return false;
3332
    }
3333
 
3334
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3335
    return false;
3336
 
3337
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3338
    return false;
3339
 
3340
  /* Is vectorizable load? */
3341
  if (!is_gimple_assign (stmt))
3342
    return false;
3343
 
3344
  scalar_dest = gimple_assign_lhs (stmt);
3345
  if (TREE_CODE (scalar_dest) != SSA_NAME)
3346
    return false;
3347
 
3348
  code = gimple_assign_rhs_code (stmt);
3349
  if (code != ARRAY_REF
3350
      && code != INDIRECT_REF
3351
      && code != COMPONENT_REF
3352
      && code != IMAGPART_EXPR
3353
      && code != REALPART_EXPR)
3354
    return false;
3355
 
3356
  if (!STMT_VINFO_DATA_REF (stmt_info))
3357
    return false;
3358
 
3359
  scalar_type = TREE_TYPE (DR_REF (dr));
3360
  mode = (int) TYPE_MODE (vectype);
3361
 
3362
  /* FORNOW. In some cases can vectorize even if data-type not supported
3363
    (e.g. - data copies).  */
3364
  if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3365
    {
3366
      if (vect_print_dump_info (REPORT_DETAILS))
3367
        fprintf (vect_dump, "Aligned load, but unsupported type.");
3368
      return false;
3369
    }
3370
 
3371
  /* The vector component type needs to be trivially convertible to the
3372
     scalar lhs.  This should always be the case.  */
3373
  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3374
    {
3375
      if (vect_print_dump_info (REPORT_DETAILS))
3376
        fprintf (vect_dump, "???  operands of different types");
3377
      return false;
3378
    }
3379
 
3380
  /* Check if the load is a part of an interleaving chain.  */
3381
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3382
    {
3383
      strided_load = true;
3384
      /* FORNOW */
3385
      gcc_assert (! nested_in_vect_loop);
3386
 
3387
      /* Check if interleaving is supported.  */
3388
      if (!vect_strided_load_supported (vectype)
3389
          && !PURE_SLP_STMT (stmt_info) && !slp)
3390
        return false;
3391
    }
3392
 
3393
  if (!vec_stmt) /* transformation not required.  */
3394
    {
3395
      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3396
      vect_model_load_cost (stmt_info, ncopies, NULL);
3397
      return true;
3398
    }
3399
 
3400
  if (vect_print_dump_info (REPORT_DETAILS))
3401
    fprintf (vect_dump, "transform load.");
3402
 
3403
  /** Transform.  **/
3404
 
3405
  if (strided_load)
3406
    {
3407
      first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3408
      /* Check if the chain of loads is already vectorized.  */
3409
      if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3410
        {
3411
          *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3412
          return true;
3413
        }
3414
      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3415
      group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3416
 
3417
      /* VEC_NUM is the number of vect stmts to be created for this group.  */
3418
      if (slp)
3419
        {
3420
          strided_load = false;
3421
          vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3422
          if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3423
            slp_perm = true;
3424
        }
3425
      else
3426
        vec_num = group_size;
3427
 
3428
      dr_chain = VEC_alloc (tree, heap, vec_num);
3429
    }
3430
  else
3431
    {
3432
      first_stmt = stmt;
3433
      first_dr = dr;
3434
      group_size = vec_num = 1;
3435
    }
3436
 
3437
  alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3438
  gcc_assert (alignment_support_scheme);
3439
 
3440
  /* In case the vectorization factor (VF) is bigger than the number
3441
     of elements that we can fit in a vectype (nunits), we have to generate
3442
     more than one vector stmt - i.e - we need to "unroll" the
3443
     vector stmt by a factor VF/nunits. In doing so, we record a pointer
3444
     from one copy of the vector stmt to the next, in the field
3445
     STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3446
     stages to find the correct vector defs to be used when vectorizing
3447
     stmts that use the defs of the current stmt. The example below illustrates
3448
     the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3449
     4 vectorized stmts):
3450
 
3451
     before vectorization:
3452
                                RELATED_STMT    VEC_STMT
3453
        S1:     x = memref      -               -
3454
        S2:     z = x + 1       -               -
3455
 
3456
     step 1: vectorize stmt S1:
3457
        We first create the vector stmt VS1_0, and, as usual, record a
3458
        pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3459
        Next, we create the vector stmt VS1_1, and record a pointer to
3460
        it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3461
        Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3462
        stmts and pointers:
3463
                                RELATED_STMT    VEC_STMT
3464
        VS1_0:  vx0 = memref0   VS1_1           -
3465
        VS1_1:  vx1 = memref1   VS1_2           -
3466
        VS1_2:  vx2 = memref2   VS1_3           -
3467
        VS1_3:  vx3 = memref3   -               -
3468
        S1:     x = load        -               VS1_0
3469
        S2:     z = x + 1       -               -
3470
 
3471
     See in documentation in vect_get_vec_def_for_stmt_copy for how the
3472
     information we recorded in RELATED_STMT field is used to vectorize
3473
     stmt S2.  */
3474
 
3475
  /* In case of interleaving (non-unit strided access):
3476
 
3477
     S1:  x2 = &base + 2
3478
     S2:  x0 = &base
3479
     S3:  x1 = &base + 1
3480
     S4:  x3 = &base + 3
3481
 
3482
     Vectorized loads are created in the order of memory accesses
3483
     starting from the access of the first stmt of the chain:
3484
 
3485
     VS1: vx0 = &base
3486
     VS2: vx1 = &base + vec_size*1
3487
     VS3: vx3 = &base + vec_size*2
3488
     VS4: vx4 = &base + vec_size*3
3489
 
3490
     Then permutation statements are generated:
3491
 
3492
     VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3493
     VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3494
       ...
3495
 
3496
     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3497
     (the order of the data-refs in the output of vect_permute_load_chain
3498
     corresponds to the order of scalar stmts in the interleaving chain - see
3499
     the documentation of vect_permute_load_chain()).
3500
     The generation of permutation stmts and recording them in
3501
     STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3502
 
3503
     In case of both multiple types and interleaving, the vector loads and
3504
     permutation stmts above are created for every copy. The result vector stmts
3505
     are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3506
     STMT_VINFO_RELATED_STMT for the next copies.  */
3507
 
3508
  /* If the data reference is aligned (dr_aligned) or potentially unaligned
3509
     on a target that supports unaligned accesses (dr_unaligned_supported)
3510
     we generate the following code:
3511
         p = initial_addr;
3512
         indx = 0;
3513
         loop {
3514
           p = p + indx * vectype_size;
3515
           vec_dest = *(p);
3516
           indx = indx + 1;
3517
         }
3518
 
3519
     Otherwise, the data reference is potentially unaligned on a target that
3520
     does not support unaligned accesses (dr_explicit_realign_optimized) -
3521
     then generate the following code, in which the data in each iteration is
3522
     obtained by two vector loads, one from the previous iteration, and one
3523
     from the current iteration:
3524
         p1 = initial_addr;
3525
         msq_init = *(floor(p1))
3526
         p2 = initial_addr + VS - 1;
3527
         realignment_token = call target_builtin;
3528
         indx = 0;
3529
         loop {
3530
           p2 = p2 + indx * vectype_size
3531
           lsq = *(floor(p2))
3532
           vec_dest = realign_load (msq, lsq, realignment_token)
3533
           indx = indx + 1;
3534
           msq = lsq;
3535
         }   */
3536
 
3537
  /* If the misalignment remains the same throughout the execution of the
3538
     loop, we can create the init_addr and permutation mask at the loop
3539
     preheader. Otherwise, it needs to be created inside the loop.
3540
     This can only occur when vectorizing memory accesses in the inner-loop
3541
     nested within an outer-loop that is being vectorized.  */
3542
 
3543
  if (loop && nested_in_vect_loop_p (loop, stmt)
3544
      && (TREE_INT_CST_LOW (DR_STEP (dr))
3545
          % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3546
    {
3547
      gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3548
      compute_in_loop = true;
3549
    }
3550
 
3551
  if ((alignment_support_scheme == dr_explicit_realign_optimized
3552
       || alignment_support_scheme == dr_explicit_realign)
3553
      && !compute_in_loop)
3554
    {
3555
      msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3556
                                    alignment_support_scheme, NULL_TREE,
3557
                                    &at_loop);
3558
      if (alignment_support_scheme == dr_explicit_realign_optimized)
3559
        {
3560
          phi = SSA_NAME_DEF_STMT (msq);
3561
          offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3562
        }
3563
    }
3564
  else
3565
    at_loop = loop;
3566
 
3567
  prev_stmt_info = NULL;
3568
  for (j = 0; j < ncopies; j++)
3569
    {
3570
      /* 1. Create the vector pointer update chain.  */
3571
      if (j == 0)
3572
        dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3573
                                                at_loop, offset,
3574
                                                &dummy, &ptr_incr, false,
3575
                                                &inv_p);
3576
      else
3577
        dataref_ptr =
3578
                bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3579
 
3580
      for (i = 0; i < vec_num; i++)
3581
        {
3582
          if (i > 0)
3583
            dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3584
                                           NULL_TREE);
3585
 
3586
          /* 2. Create the vector-load in the loop.  */
3587
          switch (alignment_support_scheme)
3588
            {
3589
            case dr_aligned:
3590
              gcc_assert (aligned_access_p (first_dr));
3591
              data_ref = build_fold_indirect_ref (dataref_ptr);
3592
              break;
3593
            case dr_unaligned_supported:
3594
              {
3595
                int mis = DR_MISALIGNMENT (first_dr);
3596
                tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3597
 
3598
                tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3599
                data_ref =
3600
                  build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3601
                break;
3602
              }
3603
            case dr_explicit_realign:
3604
              {
3605
                tree ptr, bump;
3606
                tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3607
 
3608
                if (compute_in_loop)
3609
                  msq = vect_setup_realignment (first_stmt, gsi,
3610
                                                &realignment_token,
3611
                                                dr_explicit_realign,
3612
                                                dataref_ptr, NULL);
3613
 
3614
                data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3615
                vec_dest = vect_create_destination_var (scalar_dest, vectype);
3616
                new_stmt = gimple_build_assign (vec_dest, data_ref);
3617
                new_temp = make_ssa_name (vec_dest, new_stmt);
3618
                gimple_assign_set_lhs (new_stmt, new_temp);
3619
                gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3620
                gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3621
                vect_finish_stmt_generation (stmt, new_stmt, gsi);
3622
                msq = new_temp;
3623
 
3624
                bump = size_binop (MULT_EXPR, vs_minus_1,
3625
                                   TYPE_SIZE_UNIT (scalar_type));
3626
                ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3627
                data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3628
                break;
3629
              }
3630
            case dr_explicit_realign_optimized:
3631
              data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3632
              break;
3633
            default:
3634
              gcc_unreachable ();
3635
            }
3636
          /* If accesses through a pointer to vectype do not alias the original
3637
             memory reference we have a problem.  This should never happen. */
3638
          gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3639
                      get_alias_set (gimple_assign_rhs1 (stmt))));
3640
          vec_dest = vect_create_destination_var (scalar_dest, vectype);
3641
          new_stmt = gimple_build_assign (vec_dest, data_ref);
3642
          new_temp = make_ssa_name (vec_dest, new_stmt);
3643
          gimple_assign_set_lhs (new_stmt, new_temp);
3644
          vect_finish_stmt_generation (stmt, new_stmt, gsi);
3645
          mark_symbols_for_renaming (new_stmt);
3646
 
3647
          /* 3. Handle explicit realignment if necessary/supported. Create in
3648
                loop: vec_dest = realign_load (msq, lsq, realignment_token)  */
3649
          if (alignment_support_scheme == dr_explicit_realign_optimized
3650
              || alignment_support_scheme == dr_explicit_realign)
3651
            {
3652
              tree tmp;
3653
 
3654
              lsq = gimple_assign_lhs (new_stmt);
3655
              if (!realignment_token)
3656
                realignment_token = dataref_ptr;
3657
              vec_dest = vect_create_destination_var (scalar_dest, vectype);
3658
              tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3659
                            realignment_token);
3660
              new_stmt = gimple_build_assign (vec_dest, tmp);
3661
              new_temp = make_ssa_name (vec_dest, new_stmt);
3662
              gimple_assign_set_lhs (new_stmt, new_temp);
3663
              vect_finish_stmt_generation (stmt, new_stmt, gsi);
3664
 
3665
              if (alignment_support_scheme == dr_explicit_realign_optimized)
3666
                {
3667
                  gcc_assert (phi);
3668
                  if (i == vec_num - 1 && j == ncopies - 1)
3669
                    add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
3670
                                 UNKNOWN_LOCATION);
3671
                  msq = lsq;
3672
                }
3673
            }
3674
 
3675
          /* 4. Handle invariant-load.  */
3676
          if (inv_p && !bb_vinfo)
3677
            {
3678
              gcc_assert (!strided_load);
3679
              gcc_assert (nested_in_vect_loop_p (loop, stmt));
3680
              if (j == 0)
3681
                {
3682
                  int k;
3683
                  tree t = NULL_TREE;
3684
                  tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3685
 
3686
                  /* CHECKME: bitpos depends on endianess?  */
3687
                  bitpos = bitsize_zero_node;
3688
                  vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3689
                                    bitsize, bitpos);
3690
                  vec_dest =
3691
                        vect_create_destination_var (scalar_dest, NULL_TREE);
3692
                  new_stmt = gimple_build_assign (vec_dest, vec_inv);
3693
                  new_temp = make_ssa_name (vec_dest, new_stmt);
3694
                  gimple_assign_set_lhs (new_stmt, new_temp);
3695
                  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3696
 
3697
                  for (k = nunits - 1; k >= 0; --k)
3698
                    t = tree_cons (NULL_TREE, new_temp, t);
3699
                  /* FIXME: use build_constructor directly.  */
3700
                  vec_inv = build_constructor_from_list (vectype, t);
3701
                  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3702
                  new_stmt = SSA_NAME_DEF_STMT (new_temp);
3703
                }
3704
              else
3705
                gcc_unreachable (); /* FORNOW. */
3706
            }
3707
 
3708
          /* Collect vector loads and later create their permutation in
3709
             vect_transform_strided_load ().  */
3710
          if (strided_load || slp_perm)
3711
            VEC_quick_push (tree, dr_chain, new_temp);
3712
 
3713
         /* Store vector loads in the corresponding SLP_NODE.  */
3714
          if (slp && !slp_perm)
3715
            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3716
        }
3717
 
3718
      if (slp && !slp_perm)
3719
        continue;
3720
 
3721
      if (slp_perm)
3722
        {
3723
          if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
3724
                                             slp_node_instance, false))
3725
            {
3726
              VEC_free (tree, heap, dr_chain);
3727
              return false;
3728
            }
3729
        }
3730
      else
3731
        {
3732
          if (strided_load)
3733
            {
3734
              if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3735
                return false;
3736
 
3737
              *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3738
              VEC_free (tree, heap, dr_chain);
3739
              dr_chain = VEC_alloc (tree, heap, group_size);
3740
            }
3741
          else
3742
            {
3743
              if (j == 0)
3744
                STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3745
              else
3746
                STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3747
              prev_stmt_info = vinfo_for_stmt (new_stmt);
3748
            }
3749
        }
3750
    }
3751
 
3752
  if (dr_chain)
3753
    VEC_free (tree, heap, dr_chain);
3754
 
3755
  return true;
3756
}
3757
 
3758
/* Function vect_is_simple_cond.
3759
 
3760
   Input:
3761
   LOOP - the loop that is being vectorized.
3762
   COND - Condition that is checked for simple use.
3763
 
3764
   Returns whether a COND can be vectorized.  Checks whether
3765
   condition operands are supportable using vec_is_simple_use.  */
3766
 
3767
static bool
3768
vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3769
{
3770
  tree lhs, rhs;
3771
  tree def;
3772
  enum vect_def_type dt;
3773
 
3774
  if (!COMPARISON_CLASS_P (cond))
3775
    return false;
3776
 
3777
  lhs = TREE_OPERAND (cond, 0);
3778
  rhs = TREE_OPERAND (cond, 1);
3779
 
3780
  if (TREE_CODE (lhs) == SSA_NAME)
3781
    {
3782
      gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3783
      if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
3784
                               &dt))
3785
        return false;
3786
    }
3787
  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3788
           && TREE_CODE (lhs) != FIXED_CST)
3789
    return false;
3790
 
3791
  if (TREE_CODE (rhs) == SSA_NAME)
3792
    {
3793
      gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3794
      if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
3795
                               &dt))
3796
        return false;
3797
    }
3798
  else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST
3799
           && TREE_CODE (rhs) != FIXED_CST)
3800
    return false;
3801
 
3802
  return true;
3803
}
3804
 
3805
/* vectorizable_condition.
3806
 
3807
   Check if STMT is conditional modify expression that can be vectorized.
3808
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3809
   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
3810
   at GSI.
3811
 
3812
   When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
3813
   to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
3814
   else caluse if it is 2).
3815
 
3816
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3817
 
3818
bool
3819
vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3820
                        gimple *vec_stmt, tree reduc_def, int reduc_index)
3821
{
3822
  tree scalar_dest = NULL_TREE;
3823
  tree vec_dest = NULL_TREE;
3824
  tree op = NULL_TREE;
3825
  tree cond_expr, then_clause, else_clause;
3826
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3827
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3828
  tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3829
  tree vec_compare, vec_cond_expr;
3830
  tree new_temp;
3831
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3832
  enum machine_mode vec_mode;
3833
  tree def;
3834
  enum vect_def_type dt;
3835
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3836
  int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3837
  enum tree_code code;
3838
 
3839
  /* FORNOW: unsupported in basic block SLP.  */
3840
  gcc_assert (loop_vinfo);
3841
 
3842
  gcc_assert (ncopies >= 1);
3843
  if (ncopies > 1)
3844
    return false; /* FORNOW */
3845
 
3846
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
3847
    return false;
3848
 
3849
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3850
      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
3851
           && reduc_def))
3852
    return false;
3853
 
3854
  /* FORNOW: SLP not supported.  */
3855
  if (STMT_SLP_TYPE (stmt_info))
3856
    return false;
3857
 
3858
  /* FORNOW: not yet supported.  */
3859
  if (STMT_VINFO_LIVE_P (stmt_info))
3860
    {
3861
      if (vect_print_dump_info (REPORT_DETAILS))
3862
        fprintf (vect_dump, "value used after loop.");
3863
      return false;
3864
    }
3865
 
3866
  /* Is vectorizable conditional operation?  */
3867
  if (!is_gimple_assign (stmt))
3868
    return false;
3869
 
3870
  code = gimple_assign_rhs_code (stmt);
3871
 
3872
  if (code != COND_EXPR)
3873
    return false;
3874
 
3875
  gcc_assert (gimple_assign_single_p (stmt));
3876
  op = gimple_assign_rhs1 (stmt);
3877
  cond_expr = TREE_OPERAND (op, 0);
3878
  then_clause = TREE_OPERAND (op, 1);
3879
  else_clause = TREE_OPERAND (op, 2);
3880
 
3881
  if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3882
    return false;
3883
 
3884
  /* We do not handle two different vector types for the condition
3885
     and the values.  */
3886
  if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
3887
                           TREE_TYPE (vectype)))
3888
    return false;
3889
 
3890
  if (TREE_CODE (then_clause) == SSA_NAME)
3891
    {
3892
      gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3893
      if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
3894
                               &then_def_stmt, &def, &dt))
3895
        return false;
3896
    }
3897
  else if (TREE_CODE (then_clause) != INTEGER_CST
3898
           && TREE_CODE (then_clause) != REAL_CST
3899
           && TREE_CODE (then_clause) != FIXED_CST)
3900
    return false;
3901
 
3902
  if (TREE_CODE (else_clause) == SSA_NAME)
3903
    {
3904
      gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3905
      if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
3906
                               &else_def_stmt, &def, &dt))
3907
        return false;
3908
    }
3909
  else if (TREE_CODE (else_clause) != INTEGER_CST
3910
           && TREE_CODE (else_clause) != REAL_CST
3911
           && TREE_CODE (else_clause) != FIXED_CST)
3912
    return false;
3913
 
3914
 
3915
  vec_mode = TYPE_MODE (vectype);
3916
 
3917
  if (!vec_stmt)
3918
    {
3919
      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3920
      return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
3921
    }
3922
 
3923
  /* Transform */
3924
 
3925
  /* Handle def.  */
3926
  scalar_dest = gimple_assign_lhs (stmt);
3927
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3928
 
3929
  /* Handle cond expr.  */
3930
  vec_cond_lhs =
3931
    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3932
  vec_cond_rhs =
3933
    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3934
  if (reduc_index == 1)
3935
    vec_then_clause = reduc_def;
3936
  else
3937
    vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3938
  if (reduc_index == 2)
3939
    vec_else_clause = reduc_def;
3940
  else
3941
    vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3942
 
3943
  /* Arguments are ready. Create the new vector stmt.  */
3944
  vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3945
                        vec_cond_lhs, vec_cond_rhs);
3946
  vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3947
                          vec_compare, vec_then_clause, vec_else_clause);
3948
 
3949
  *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3950
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
3951
  gimple_assign_set_lhs (*vec_stmt, new_temp);
3952
  vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3953
 
3954
  return true;
3955
}
3956
 
3957
 
3958
/* Make sure the statement is vectorizable.  */
3959
 
3960
bool
3961
vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
3962
{
3963
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3964
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3965
  enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3966
  bool ok;
3967
  HOST_WIDE_INT dummy;
3968
  tree scalar_type, vectype;
3969
 
3970
  if (vect_print_dump_info (REPORT_DETAILS))
3971
    {
3972
      fprintf (vect_dump, "==> examining statement: ");
3973
      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3974
    }
3975
 
3976
  if (gimple_has_volatile_ops (stmt))
3977
    {
3978
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3979
        fprintf (vect_dump, "not vectorized: stmt has volatile operands");
3980
 
3981
      return false;
3982
    }
3983
 
3984
  /* Skip stmts that do not need to be vectorized. In loops this is expected
3985
     to include:
3986
     - the COND_EXPR which is the loop exit condition
3987
     - any LABEL_EXPRs in the loop
3988
     - computations that are used only for array indexing or loop control.
3989
     In basic blocks we only analyze statements that are a part of some SLP
3990
     instance, therefore, all the statements are relevant.  */
3991
 
3992
  if (!STMT_VINFO_RELEVANT_P (stmt_info)
3993
      && !STMT_VINFO_LIVE_P (stmt_info))
3994
    {
3995
      if (vect_print_dump_info (REPORT_DETAILS))
3996
        fprintf (vect_dump, "irrelevant.");
3997
 
3998
      return true;
3999
    }
4000
 
4001
  switch (STMT_VINFO_DEF_TYPE (stmt_info))
4002
    {
4003
      case vect_internal_def:
4004
        break;
4005
 
4006
      case vect_reduction_def:
4007
      case vect_nested_cycle:
4008
         gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
4009
                     || relevance == vect_used_in_outer_by_reduction
4010
                     || relevance == vect_unused_in_scope));
4011
         break;
4012
 
4013
      case vect_induction_def:
4014
      case vect_constant_def:
4015
      case vect_external_def:
4016
      case vect_unknown_def_type:
4017
      default:
4018
        gcc_unreachable ();
4019
    }
4020
 
4021
  if (bb_vinfo)
4022
    {
4023
      gcc_assert (PURE_SLP_STMT (stmt_info));
4024
 
4025
      scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
4026
      if (vect_print_dump_info (REPORT_DETAILS))
4027
        {
4028
          fprintf (vect_dump, "get vectype for scalar type:  ");
4029
          print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4030
        }
4031
 
4032
      vectype = get_vectype_for_scalar_type (scalar_type);
4033
      if (!vectype)
4034
        {
4035
          if (vect_print_dump_info (REPORT_DETAILS))
4036
            {
4037
               fprintf (vect_dump, "not SLPed: unsupported data-type ");
4038
               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4039
            }
4040
          return false;
4041
        }
4042
 
4043
      if (vect_print_dump_info (REPORT_DETAILS))
4044
        {
4045
          fprintf (vect_dump, "vectype:  ");
4046
          print_generic_expr (vect_dump, vectype, TDF_SLIM);
4047
        }
4048
 
4049
      STMT_VINFO_VECTYPE (stmt_info) = vectype;
4050
   }
4051
 
4052
  if (STMT_VINFO_RELEVANT_P (stmt_info))
4053
    {
4054
      gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4055
      gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4056
      *need_to_vectorize = true;
4057
    }
4058
 
4059
   ok = true;
4060
   if (!bb_vinfo
4061
       && (STMT_VINFO_RELEVANT_P (stmt_info)
4062
           || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4063
      ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4064
            || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4065
            || vectorizable_conversion (stmt, NULL, NULL, NULL)
4066
            || vectorizable_operation (stmt, NULL, NULL, NULL)
4067
            || vectorizable_assignment (stmt, NULL, NULL, NULL)
4068
            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4069
            || vectorizable_call (stmt, NULL, NULL)
4070
            || vectorizable_store (stmt, NULL, NULL, NULL)
4071
            || vectorizable_reduction (stmt, NULL, NULL)
4072
            || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
4073
    else
4074
      {
4075
        if (bb_vinfo)
4076
          ok = (vectorizable_operation (stmt, NULL, NULL, node)
4077
                || vectorizable_assignment (stmt, NULL, NULL, node)
4078
                || vectorizable_load (stmt, NULL, NULL, node, NULL)
4079
                || vectorizable_store (stmt, NULL, NULL, node));
4080
      }
4081
 
4082
  if (!ok)
4083
    {
4084
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4085
        {
4086
          fprintf (vect_dump, "not vectorized: relevant stmt not ");
4087
          fprintf (vect_dump, "supported: ");
4088
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4089
        }
4090
 
4091
      return false;
4092
    }
4093
 
4094
  if (bb_vinfo)
4095
    return true;
4096
 
4097
  /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4098
      need extra handling, except for vectorizable reductions.  */
4099
  if (STMT_VINFO_LIVE_P (stmt_info)
4100
      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4101
    ok = vectorizable_live_operation (stmt, NULL, NULL);
4102
 
4103
  if (!ok)
4104
    {
4105
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4106
        {
4107
          fprintf (vect_dump, "not vectorized: live stmt not ");
4108
          fprintf (vect_dump, "supported: ");
4109
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4110
        }
4111
 
4112
       return false;
4113
    }
4114
 
4115
  if (!PURE_SLP_STMT (stmt_info))
4116
    {
4117
      /* Groups of strided accesses whose size is not a power of 2 are not
4118
         vectorizable yet using loop-vectorization. Therefore, if this stmt
4119
         feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4120
         loop-based vectorized), the loop cannot be vectorized.  */
4121
      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4122
          && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4123
                                        DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4124
        {
4125
          if (vect_print_dump_info (REPORT_DETAILS))
4126
            {
4127
              fprintf (vect_dump, "not vectorized: the size of group "
4128
                                  "of strided accesses is not a power of 2");
4129
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4130
            }
4131
 
4132
          return false;
4133
        }
4134
    }
4135
 
4136
  return true;
4137
}
4138
 
4139
 
4140
/* Function vect_transform_stmt.
4141
 
4142
   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
4143
 
4144
bool
4145
vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4146
                     bool *strided_store, slp_tree slp_node,
4147
                     slp_instance slp_node_instance)
4148
{
4149
  bool is_store = false;
4150
  gimple vec_stmt = NULL;
4151
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4152
  gimple orig_stmt_in_pattern;
4153
  bool done;
4154
 
4155
  switch (STMT_VINFO_TYPE (stmt_info))
4156
    {
4157
    case type_demotion_vec_info_type:
4158
      done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4159
      gcc_assert (done);
4160
      break;
4161
 
4162
    case type_promotion_vec_info_type:
4163
      done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4164
      gcc_assert (done);
4165
      break;
4166
 
4167
    case type_conversion_vec_info_type:
4168
      done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4169
      gcc_assert (done);
4170
      break;
4171
 
4172
    case induc_vec_info_type:
4173
      gcc_assert (!slp_node);
4174
      done = vectorizable_induction (stmt, gsi, &vec_stmt);
4175
      gcc_assert (done);
4176
      break;
4177
 
4178
    case op_vec_info_type:
4179
      done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4180
      gcc_assert (done);
4181
      break;
4182
 
4183
    case assignment_vec_info_type:
4184
      done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4185
      gcc_assert (done);
4186
      break;
4187
 
4188
    case load_vec_info_type:
4189
      done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4190
                                slp_node_instance);
4191
      gcc_assert (done);
4192
      break;
4193
 
4194
    case store_vec_info_type:
4195
      done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4196
      gcc_assert (done);
4197
      if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4198
        {
4199
          /* In case of interleaving, the whole chain is vectorized when the
4200
             last store in the chain is reached. Store stmts before the last
4201
             one are skipped, and there vec_stmt_info shouldn't be freed
4202
             meanwhile.  */
4203
          *strided_store = true;
4204
          if (STMT_VINFO_VEC_STMT (stmt_info))
4205
            is_store = true;
4206
          }
4207
      else
4208
        is_store = true;
4209
      break;
4210
 
4211
    case condition_vec_info_type:
4212
      gcc_assert (!slp_node);
4213
      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
4214
      gcc_assert (done);
4215
      break;
4216
 
4217
    case call_vec_info_type:
4218
      gcc_assert (!slp_node);
4219
      done = vectorizable_call (stmt, gsi, &vec_stmt);
4220
      break;
4221
 
4222
    case reduc_vec_info_type:
4223
      gcc_assert (!slp_node);
4224
      done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4225
      gcc_assert (done);
4226
      break;
4227
 
4228
    default:
4229
      if (!STMT_VINFO_LIVE_P (stmt_info))
4230
        {
4231
          if (vect_print_dump_info (REPORT_DETAILS))
4232
            fprintf (vect_dump, "stmt not supported.");
4233
          gcc_unreachable ();
4234
        }
4235
    }
4236
 
4237
  /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4238
     is being vectorized, but outside the immediately enclosing loop.  */
4239
  if (vec_stmt
4240
      && STMT_VINFO_LOOP_VINFO (stmt_info)
4241
      && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4242
                                STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
4243
      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4244
      && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4245
          || STMT_VINFO_RELEVANT (stmt_info) ==
4246
                                           vect_used_in_outer_by_reduction))
4247
    {
4248
      struct loop *innerloop = LOOP_VINFO_LOOP (
4249
                                STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
4250
      imm_use_iterator imm_iter;
4251
      use_operand_p use_p;
4252
      tree scalar_dest;
4253
      gimple exit_phi;
4254
 
4255
      if (vect_print_dump_info (REPORT_DETAILS))
4256
        fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4257
 
4258
      /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4259
        (to be used when vectorizing outer-loop stmts that use the DEF of
4260
        STMT).  */
4261
      if (gimple_code (stmt) == GIMPLE_PHI)
4262
        scalar_dest = PHI_RESULT (stmt);
4263
      else
4264
        scalar_dest = gimple_assign_lhs (stmt);
4265
 
4266
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4267
       {
4268
         if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4269
           {
4270
             exit_phi = USE_STMT (use_p);
4271
             STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4272
           }
4273
       }
4274
    }
4275
 
4276
  /* Handle stmts whose DEF is used outside the loop-nest that is
4277
     being vectorized.  */
4278
  if (STMT_VINFO_LIVE_P (stmt_info)
4279
      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4280
    {
4281
      done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4282
      gcc_assert (done);
4283
    }
4284
 
4285
  if (vec_stmt)
4286
    {
4287
      STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4288
      orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4289
      if (orig_stmt_in_pattern)
4290
        {
4291
          stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4292
          /* STMT was inserted by the vectorizer to replace a computation idiom.
4293
             ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4294
             computed this idiom.  We need to record a pointer to VEC_STMT in
4295
             the stmt_info of ORIG_STMT_IN_PATTERN.  See more details in the
4296
             documentation of vect_pattern_recog.  */
4297
          if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4298
            {
4299
              gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4300
              STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4301
            }
4302
        }
4303
    }
4304
 
4305
  return is_store;
4306
}
4307
 
4308
 
4309
/* Remove a group of stores (for SLP or interleaving), free their
4310
   stmt_vec_info.  */
4311
 
4312
void
4313
vect_remove_stores (gimple first_stmt)
4314
{
4315
  gimple next = first_stmt;
4316
  gimple tmp;
4317
  gimple_stmt_iterator next_si;
4318
 
4319
  while (next)
4320
    {
4321
      /* Free the attached stmt_vec_info and remove the stmt.  */
4322
      next_si = gsi_for_stmt (next);
4323
      gsi_remove (&next_si, true);
4324
      tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4325
      free_stmt_vec_info (next);
4326
      next = tmp;
4327
    }
4328
}
4329
 
4330
 
4331
/* Function new_stmt_vec_info.
4332
 
4333
   Create and initialize a new stmt_vec_info struct for STMT.  */
4334
 
4335
stmt_vec_info
4336
new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
4337
                   bb_vec_info bb_vinfo)
4338
{
4339
  stmt_vec_info res;
4340
  res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4341
 
4342
  STMT_VINFO_TYPE (res) = undef_vec_info_type;
4343
  STMT_VINFO_STMT (res) = stmt;
4344
  STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4345
  STMT_VINFO_BB_VINFO (res) = bb_vinfo;
4346
  STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4347
  STMT_VINFO_LIVE_P (res) = false;
4348
  STMT_VINFO_VECTYPE (res) = NULL;
4349
  STMT_VINFO_VEC_STMT (res) = NULL;
4350
  STMT_VINFO_IN_PATTERN_P (res) = false;
4351
  STMT_VINFO_RELATED_STMT (res) = NULL;
4352
  STMT_VINFO_DATA_REF (res) = NULL;
4353
 
4354
  STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4355
  STMT_VINFO_DR_OFFSET (res) = NULL;
4356
  STMT_VINFO_DR_INIT (res) = NULL;
4357
  STMT_VINFO_DR_STEP (res) = NULL;
4358
  STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4359
 
4360
  if (gimple_code (stmt) == GIMPLE_PHI
4361
      && is_loop_header_bb_p (gimple_bb (stmt)))
4362
    STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4363
  else
4364
    STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4365
 
4366
  STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4367
  STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4368
  STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4369
  STMT_SLP_TYPE (res) = loop_vect;
4370
  DR_GROUP_FIRST_DR (res) = NULL;
4371
  DR_GROUP_NEXT_DR (res) = NULL;
4372
  DR_GROUP_SIZE (res) = 0;
4373
  DR_GROUP_STORE_COUNT (res) = 0;
4374
  DR_GROUP_GAP (res) = 0;
4375
  DR_GROUP_SAME_DR_STMT (res) = NULL;
4376
  DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4377
 
4378
  return res;
4379
}
4380
 
4381
 
4382
/* Create a hash table for stmt_vec_info. */
4383
 
4384
void
4385
init_stmt_vec_info_vec (void)
4386
{
4387
  gcc_assert (!stmt_vec_info_vec);
4388
  stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4389
}
4390
 
4391
 
4392
/* Free hash table for stmt_vec_info. */
4393
 
4394
void
4395
free_stmt_vec_info_vec (void)
4396
{
4397
  gcc_assert (stmt_vec_info_vec);
4398
  VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4399
}
4400
 
4401
 
4402
/* Free stmt vectorization related info.  */
4403
 
4404
void
4405
free_stmt_vec_info (gimple stmt)
4406
{
4407
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4408
 
4409
  if (!stmt_info)
4410
    return;
4411
 
4412
  VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4413
  set_vinfo_for_stmt (stmt, NULL);
4414
  free (stmt_info);
4415
}
4416
 
4417
 
4418
/* Function get_vectype_for_scalar_type.
4419
 
4420
   Returns the vector type corresponding to SCALAR_TYPE as supported
4421
   by the target.  */
4422
 
4423
tree
4424
get_vectype_for_scalar_type (tree scalar_type)
4425
{
4426
  enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4427
  unsigned int nbytes = GET_MODE_SIZE (inner_mode);
4428
  int nunits;
4429
  tree vectype;
4430
 
4431
  if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4432
    return NULL_TREE;
4433
 
4434
  /* We can't build a vector type of elements with alignment bigger than
4435
     their size.  */
4436
  if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
4437
    return NULL_TREE;
4438
 
4439
  /* If we'd build a vector type of elements whose mode precision doesn't
4440
     match their types precision we'll get mismatched types on vector
4441
     extracts via BIT_FIELD_REFs.  This effectively means we disable
4442
     vectorization of bool and/or enum types in some languages.  */
4443
  if (INTEGRAL_TYPE_P (scalar_type)
4444
      && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
4445
    return NULL_TREE;
4446
 
4447
  /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4448
     is expected.  */
4449
  nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4450
 
4451
  vectype = build_vector_type (scalar_type, nunits);
4452
  if (vect_print_dump_info (REPORT_DETAILS))
4453
    {
4454
      fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4455
      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4456
    }
4457
 
4458
  if (!vectype)
4459
    return NULL_TREE;
4460
 
4461
  if (vect_print_dump_info (REPORT_DETAILS))
4462
    {
4463
      fprintf (vect_dump, "vectype: ");
4464
      print_generic_expr (vect_dump, vectype, TDF_SLIM);
4465
    }
4466
 
4467
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4468
      && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4469
    {
4470
      if (vect_print_dump_info (REPORT_DETAILS))
4471
        fprintf (vect_dump, "mode not supported by target.");
4472
      return NULL_TREE;
4473
    }
4474
 
4475
  return vectype;
4476
}
4477
 
4478
/* Function vect_is_simple_use.
4479
 
4480
   Input:
4481
   LOOP_VINFO - the vect info of the loop that is being vectorized.
4482
   BB_VINFO - the vect info of the basic block that is being vectorized.
4483
   OPERAND - operand of a stmt in the loop or bb.
4484
   DEF - the defining stmt in case OPERAND is an SSA_NAME.
4485
 
4486
   Returns whether a stmt with OPERAND can be vectorized.
4487
   For loops, supportable operands are constants, loop invariants, and operands
4488
   that are defined by the current iteration of the loop. Unsupportable
4489
   operands are those that are defined by a previous iteration of the loop (as
4490
   is the case in reduction/induction computations).
4491
   For basic blocks, supportable operands are constants and bb invariants.
4492
   For now, operands defined outside the basic block are not supported.  */
4493
 
4494
bool
4495
vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
4496
                    bb_vec_info bb_vinfo, gimple *def_stmt,
4497
                    tree *def, enum vect_def_type *dt)
4498
{
4499
  basic_block bb;
4500
  stmt_vec_info stmt_vinfo;
4501
  struct loop *loop = NULL;
4502
 
4503
  if (loop_vinfo)
4504
    loop = LOOP_VINFO_LOOP (loop_vinfo);
4505
 
4506
  *def_stmt = NULL;
4507
  *def = NULL_TREE;
4508
 
4509
  if (vect_print_dump_info (REPORT_DETAILS))
4510
    {
4511
      fprintf (vect_dump, "vect_is_simple_use: operand ");
4512
      print_generic_expr (vect_dump, operand, TDF_SLIM);
4513
    }
4514
 
4515
  if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4516
    {
4517
      *dt = vect_constant_def;
4518
      return true;
4519
    }
4520
 
4521
  if (is_gimple_min_invariant (operand))
4522
    {
4523
      *def = operand;
4524
      *dt = vect_external_def;
4525
      return true;
4526
    }
4527
 
4528
  if (TREE_CODE (operand) == PAREN_EXPR)
4529
    {
4530
      if (vect_print_dump_info (REPORT_DETAILS))
4531
        fprintf (vect_dump, "non-associatable copy.");
4532
      operand = TREE_OPERAND (operand, 0);
4533
    }
4534
 
4535
  if (TREE_CODE (operand) != SSA_NAME)
4536
    {
4537
      if (vect_print_dump_info (REPORT_DETAILS))
4538
        fprintf (vect_dump, "not ssa-name.");
4539
      return false;
4540
    }
4541
 
4542
  *def_stmt = SSA_NAME_DEF_STMT (operand);
4543
  if (*def_stmt == NULL)
4544
    {
4545
      if (vect_print_dump_info (REPORT_DETAILS))
4546
        fprintf (vect_dump, "no def_stmt.");
4547
      return false;
4548
    }
4549
 
4550
  if (vect_print_dump_info (REPORT_DETAILS))
4551
    {
4552
      fprintf (vect_dump, "def_stmt: ");
4553
      print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4554
    }
4555
 
4556
  /* Empty stmt is expected only in case of a function argument.
4557
     (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
4558
  if (gimple_nop_p (*def_stmt))
4559
    {
4560
      *def = operand;
4561
      *dt = vect_external_def;
4562
      return true;
4563
    }
4564
 
4565
  bb = gimple_bb (*def_stmt);
4566
 
4567
  if ((loop && !flow_bb_inside_loop_p (loop, bb))
4568
      || (!loop && bb != BB_VINFO_BB (bb_vinfo))
4569
      || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
4570
    *dt = vect_external_def;
4571
  else
4572
    {
4573
      stmt_vinfo = vinfo_for_stmt (*def_stmt);
4574
      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4575
    }
4576
 
4577
  if (*dt == vect_unknown_def_type)
4578
    {
4579
      if (vect_print_dump_info (REPORT_DETAILS))
4580
        fprintf (vect_dump, "Unsupported pattern.");
4581
      return false;
4582
    }
4583
 
4584
  if (vect_print_dump_info (REPORT_DETAILS))
4585
    fprintf (vect_dump, "type of def: %d.",*dt);
4586
 
4587
  switch (gimple_code (*def_stmt))
4588
    {
4589
    case GIMPLE_PHI:
4590
      *def = gimple_phi_result (*def_stmt);
4591
      break;
4592
 
4593
    case GIMPLE_ASSIGN:
4594
      *def = gimple_assign_lhs (*def_stmt);
4595
      break;
4596
 
4597
    case GIMPLE_CALL:
4598
      *def = gimple_call_lhs (*def_stmt);
4599
      if (*def != NULL)
4600
        break;
4601
      /* FALLTHRU */
4602
    default:
4603
      if (vect_print_dump_info (REPORT_DETAILS))
4604
        fprintf (vect_dump, "unsupported defining stmt: ");
4605
      return false;
4606
    }
4607
 
4608
  return true;
4609
}
4610
 
4611
 
4612
/* Function supportable_widening_operation
4613
 
4614
   Check whether an operation represented by the code CODE is a
4615
   widening operation that is supported by the target platform in
4616
   vector form (i.e., when operating on arguments of type VECTYPE).
4617
 
4618
   Widening operations we currently support are NOP (CONVERT), FLOAT
4619
   and WIDEN_MULT.  This function checks if these operations are supported
4620
   by the target platform either directly (via vector tree-codes), or via
4621
   target builtins.
4622
 
4623
   Output:
4624
   - CODE1 and CODE2 are codes of vector operations to be used when
4625
   vectorizing the operation, if available.
4626
   - DECL1 and DECL2 are decls of target builtin functions to be used
4627
   when vectorizing the operation, if available. In this case,
4628
   CODE1 and CODE2 are CALL_EXPR.
4629
   - MULTI_STEP_CVT determines the number of required intermediate steps in
4630
   case of multi-step conversion (like char->short->int - in that case
4631
   MULTI_STEP_CVT will be 1).
4632
   - INTERM_TYPES contains the intermediate type required to perform the
4633
   widening operation (short in the above example).  */
4634
 
4635
bool
4636
supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4637
                                tree *decl1, tree *decl2,
4638
                                enum tree_code *code1, enum tree_code *code2,
4639
                                int *multi_step_cvt,
4640
                                VEC (tree, heap) **interm_types)
4641
{
4642
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4643
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4644
  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4645
  bool ordered_p;
4646
  enum machine_mode vec_mode;
4647
  enum insn_code icode1, icode2;
4648
  optab optab1, optab2;
4649
  tree type = gimple_expr_type (stmt);
4650
  tree wide_vectype = get_vectype_for_scalar_type (type);
4651
  enum tree_code c1, c2;
4652
 
4653
  /* The result of a vectorized widening operation usually requires two vectors
4654
     (because the widened results do not fit int one vector). The generated
4655
     vector results would normally be expected to be generated in the same
4656
     order as in the original scalar computation, i.e. if 8 results are
4657
     generated in each vector iteration, they are to be organized as follows:
4658
        vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4659
 
4660
     However, in the special case that the result of the widening operation is
4661
     used in a reduction computation only, the order doesn't matter (because
4662
     when vectorizing a reduction we change the order of the computation).
4663
     Some targets can take advantage of this and generate more efficient code.
4664
     For example, targets like Altivec, that support widen_mult using a sequence
4665
     of {mult_even,mult_odd} generate the following vectors:
4666
        vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4667
 
4668
     When vectorizing outer-loops, we execute the inner-loop sequentially
4669
     (each vectorized inner-loop iteration contributes to VF outer-loop
4670
     iterations in parallel). We therefore don't allow to change the order
4671
     of the computation in the inner-loop during outer-loop vectorization.  */
4672
 
4673
   if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4674
       && !nested_in_vect_loop_p (vect_loop, stmt))
4675
     ordered_p = false;
4676
   else
4677
     ordered_p = true;
4678
 
4679
  if (!ordered_p
4680
      && code == WIDEN_MULT_EXPR
4681
      && targetm.vectorize.builtin_mul_widen_even
4682
      && targetm.vectorize.builtin_mul_widen_even (vectype)
4683
      && targetm.vectorize.builtin_mul_widen_odd
4684
      && targetm.vectorize.builtin_mul_widen_odd (vectype))
4685
    {
4686
      if (vect_print_dump_info (REPORT_DETAILS))
4687
        fprintf (vect_dump, "Unordered widening operation detected.");
4688
 
4689
      *code1 = *code2 = CALL_EXPR;
4690
      *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4691
      *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4692
      return true;
4693
    }
4694
 
4695
  switch (code)
4696
    {
4697
    case WIDEN_MULT_EXPR:
4698
      if (BYTES_BIG_ENDIAN)
4699
        {
4700
          c1 = VEC_WIDEN_MULT_HI_EXPR;
4701
          c2 = VEC_WIDEN_MULT_LO_EXPR;
4702
        }
4703
      else
4704
        {
4705
          c2 = VEC_WIDEN_MULT_HI_EXPR;
4706
          c1 = VEC_WIDEN_MULT_LO_EXPR;
4707
        }
4708
      break;
4709
 
4710
    CASE_CONVERT:
4711
      if (BYTES_BIG_ENDIAN)
4712
        {
4713
          c1 = VEC_UNPACK_HI_EXPR;
4714
          c2 = VEC_UNPACK_LO_EXPR;
4715
        }
4716
      else
4717
        {
4718
          c2 = VEC_UNPACK_HI_EXPR;
4719
          c1 = VEC_UNPACK_LO_EXPR;
4720
        }
4721
      break;
4722
 
4723
    case FLOAT_EXPR:
4724
      if (BYTES_BIG_ENDIAN)
4725
        {
4726
          c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4727
          c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4728
        }
4729
      else
4730
        {
4731
          c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4732
          c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4733
        }
4734
      break;
4735
 
4736
    case FIX_TRUNC_EXPR:
4737
      /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4738
         VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4739
         computing the operation.  */
4740
      return false;
4741
 
4742
    default:
4743
      gcc_unreachable ();
4744
    }
4745
 
4746
  if (code == FIX_TRUNC_EXPR)
4747
    {
4748
      /* The signedness is determined from output operand.  */
4749
      optab1 = optab_for_tree_code (c1, type, optab_default);
4750
      optab2 = optab_for_tree_code (c2, type, optab_default);
4751
    }
4752
  else
4753
    {
4754
      optab1 = optab_for_tree_code (c1, vectype, optab_default);
4755
      optab2 = optab_for_tree_code (c2, vectype, optab_default);
4756
    }
4757
 
4758
  if (!optab1 || !optab2)
4759
    return false;
4760
 
4761
  vec_mode = TYPE_MODE (vectype);
4762
  if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4763
       || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4764
                                                       == CODE_FOR_nothing)
4765
    return false;
4766
 
4767
  /* Check if it's a multi-step conversion that can be done using intermediate
4768
     types.  */
4769
  if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4770
       || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4771
    {
4772
      int i;
4773
      tree prev_type = vectype, intermediate_type;
4774
      enum machine_mode intermediate_mode, prev_mode = vec_mode;
4775
      optab optab3, optab4;
4776
 
4777
      if (!CONVERT_EXPR_CODE_P (code))
4778
        return false;
4779
 
4780
      *code1 = c1;
4781
      *code2 = c2;
4782
 
4783
      /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4784
         intermediate  steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4785
         to get to NARROW_VECTYPE, and fail if we do not.  */
4786
      *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4787
      for (i = 0; i < 3; i++)
4788
        {
4789
          intermediate_mode = insn_data[icode1].operand[0].mode;
4790
          intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4791
                                                     TYPE_UNSIGNED (prev_type));
4792
          optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4793
          optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4794
 
4795
          if (!optab3 || !optab4
4796
              || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4797
                                                        == CODE_FOR_nothing
4798
              || insn_data[icode1].operand[0].mode != intermediate_mode
4799
              || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4800
                                                        == CODE_FOR_nothing
4801
              || insn_data[icode2].operand[0].mode != intermediate_mode
4802
              || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4803
                                                        == CODE_FOR_nothing
4804
              || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4805
                                                        == CODE_FOR_nothing)
4806
            return false;
4807
 
4808
          VEC_quick_push (tree, *interm_types, intermediate_type);
4809
          (*multi_step_cvt)++;
4810
 
4811
          if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4812
              && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4813
            return true;
4814
 
4815
          prev_type = intermediate_type;
4816
          prev_mode = intermediate_mode;
4817
        }
4818
 
4819
       return false;
4820
    }
4821
 
4822
  *code1 = c1;
4823
  *code2 = c2;
4824
  return true;
4825
}
4826
 
4827
 
4828
/* Function supportable_narrowing_operation
4829
 
4830
   Check whether an operation represented by the code CODE is a
4831
   narrowing operation that is supported by the target platform in
4832
   vector form (i.e., when operating on arguments of type VECTYPE).
4833
 
4834
   Narrowing operations we currently support are NOP (CONVERT) and
4835
   FIX_TRUNC. This function checks if these operations are supported by
4836
   the target platform directly via vector tree-codes.
4837
 
4838
   Output:
4839
   - CODE1 is the code of a vector operation to be used when
4840
   vectorizing the operation, if available.
4841
   - MULTI_STEP_CVT determines the number of required intermediate steps in
4842
   case of multi-step conversion (like int->short->char - in that case
4843
   MULTI_STEP_CVT will be 1).
4844
   - INTERM_TYPES contains the intermediate type required to perform the
4845
   narrowing operation (short in the above example).   */
4846
 
4847
bool
4848
supportable_narrowing_operation (enum tree_code code,
4849
                                 const_gimple stmt, tree vectype,
4850
                                 enum tree_code *code1, int *multi_step_cvt,
4851
                                 VEC (tree, heap) **interm_types)
4852
{
4853
  enum machine_mode vec_mode;
4854
  enum insn_code icode1;
4855
  optab optab1, interm_optab;
4856
  tree type = gimple_expr_type (stmt);
4857
  tree narrow_vectype = get_vectype_for_scalar_type (type);
4858
  enum tree_code c1;
4859
  tree intermediate_type, prev_type;
4860
  int i;
4861
 
4862
  switch (code)
4863
    {
4864
    CASE_CONVERT:
4865
      c1 = VEC_PACK_TRUNC_EXPR;
4866
      break;
4867
 
4868
    case FIX_TRUNC_EXPR:
4869
      c1 = VEC_PACK_FIX_TRUNC_EXPR;
4870
      break;
4871
 
4872
    case FLOAT_EXPR:
4873
      /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4874
         tree code and optabs used for computing the operation.  */
4875
      return false;
4876
 
4877
    default:
4878
      gcc_unreachable ();
4879
    }
4880
 
4881
  if (code == FIX_TRUNC_EXPR)
4882
    /* The signedness is determined from output operand.  */
4883
    optab1 = optab_for_tree_code (c1, type, optab_default);
4884
  else
4885
    optab1 = optab_for_tree_code (c1, vectype, optab_default);
4886
 
4887
  if (!optab1)
4888
    return false;
4889
 
4890
  vec_mode = TYPE_MODE (vectype);
4891
  if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4892
       == CODE_FOR_nothing)
4893
    return false;
4894
 
4895
  /* Check if it's a multi-step conversion that can be done using intermediate
4896
     types.  */
4897
  if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4898
    {
4899
      enum machine_mode intermediate_mode, prev_mode = vec_mode;
4900
 
4901
      *code1 = c1;
4902
      prev_type = vectype;
4903
      /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4904
         intermediate  steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4905
         to get to NARROW_VECTYPE, and fail if we do not.  */
4906
      *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4907
      for (i = 0; i < 3; i++)
4908
        {
4909
          intermediate_mode = insn_data[icode1].operand[0].mode;
4910
          intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4911
                                                     TYPE_UNSIGNED (prev_type));
4912
          interm_optab = optab_for_tree_code (c1, intermediate_type,
4913
                                              optab_default);
4914
          if (!interm_optab
4915
              || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4916
                                                        == CODE_FOR_nothing
4917
              || insn_data[icode1].operand[0].mode != intermediate_mode
4918
              || (icode1
4919
                  = interm_optab->handlers[(int) intermediate_mode].insn_code)
4920
                 == CODE_FOR_nothing)
4921
            return false;
4922
 
4923
          VEC_quick_push (tree, *interm_types, intermediate_type);
4924
          (*multi_step_cvt)++;
4925
 
4926
          if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4927
            return true;
4928
 
4929
          prev_type = intermediate_type;
4930
          prev_mode = intermediate_mode;
4931
        }
4932
 
4933
      return false;
4934
    }
4935
 
4936
  *code1 = c1;
4937
  return true;
4938
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.