OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [tree-vect-transform.c] - Blame information for rev 12

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Transformation Utilities for Loop Vectorization.
2
   Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 2, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING.  If not, write to the Free
19
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20
02110-1301, USA.  */
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "coretypes.h"
25
#include "tm.h"
26
#include "ggc.h"
27
#include "tree.h"
28
#include "target.h"
29
#include "rtl.h"
30
#include "basic-block.h"
31
#include "diagnostic.h"
32
#include "tree-flow.h"
33
#include "tree-dump.h"
34
#include "timevar.h"
35
#include "cfgloop.h"
36
#include "expr.h"
37
#include "optabs.h"
38
#include "recog.h"
39
#include "tree-data-ref.h"
40
#include "tree-chrec.h"
41
#include "tree-scalar-evolution.h"
42
#include "tree-vectorizer.h"
43
#include "langhooks.h"
44
#include "tree-pass.h"
45
#include "toplev.h"
46
#include "real.h"
47
 
48
/* Utility functions for the code transformation.  */
49
static bool vect_transform_stmt (tree, block_stmt_iterator *);
50
static void vect_align_data_ref (tree);
51
static tree vect_create_destination_var (tree, tree);
52
static tree vect_create_data_ref_ptr
53
  (tree, block_stmt_iterator *, tree, tree *, bool);
54
static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55
static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56
static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57
static tree vect_init_vector (tree, tree);
58
static void vect_finish_stmt_generation
59
  (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60
static bool vect_is_simple_cond (tree, loop_vec_info);
61
static void update_vuses_to_preheader (tree, struct loop*);
62
static tree get_initial_def_for_reduction (tree, tree, tree *);
63
 
64
/* Utility function dealing with loop peeling (not peeling itself).  */
65
static void vect_generate_tmps_on_preheader
66
  (loop_vec_info, tree *, tree *, tree *);
67
static tree vect_build_loop_niters (loop_vec_info);
68
static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69
static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70
static void vect_update_init_of_dr (struct data_reference *, tree niters);
71
static void vect_update_inits_of_drs (loop_vec_info, tree);
72
static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73
static void vect_do_peeling_for_loop_bound
74
  (loop_vec_info, tree *, struct loops *);
75
static int vect_min_worthwhile_factor (enum tree_code);
76
 
77
 
78
/* Function vect_get_new_vect_var.
79
 
80
   Returns a name for a new variable. The current naming scheme appends the
81
   prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82
   the name of vectorizer generated variables, and appends that to NAME if
83
   provided.  */
84
 
85
static tree
86
vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
87
{
88
  const char *prefix;
89
  tree new_vect_var;
90
 
91
  switch (var_kind)
92
  {
93
  case vect_simple_var:
94
    prefix = "vect_";
95
    break;
96
  case vect_scalar_var:
97
    prefix = "stmp_";
98
    break;
99
  case vect_pointer_var:
100
    prefix = "vect_p";
101
    break;
102
  default:
103
    gcc_unreachable ();
104
  }
105
 
106
  if (name)
107
    new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
108
  else
109
    new_vect_var = create_tmp_var (type, prefix);
110
 
111
  return new_vect_var;
112
}
113
 
114
 
115
/* Function vect_create_addr_base_for_vector_ref.
116
 
117
   Create an expression that computes the address of the first memory location
118
   that will be accessed for a data reference.
119
 
120
   Input:
121
   STMT: The statement containing the data reference.
122
   NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
123
   OFFSET: Optional. If supplied, it is be added to the initial address.
124
 
125
   Output:
126
   1. Return an SSA_NAME whose value is the address of the memory location of
127
      the first vector of the data reference.
128
   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
129
      these statement(s) which define the returned SSA_NAME.
130
 
131
   FORNOW: We are only handling array accesses with step 1.  */
132
 
133
static tree
134
vect_create_addr_base_for_vector_ref (tree stmt,
135
                                      tree *new_stmt_list,
136
                                      tree offset)
137
{
138
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
139
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
140
  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
141
  tree base_name = build_fold_indirect_ref (data_ref_base);
142
  tree ref = DR_REF (dr);
143
  tree scalar_type = TREE_TYPE (ref);
144
  tree scalar_ptr_type = build_pointer_type (scalar_type);
145
  tree vec_stmt;
146
  tree new_temp;
147
  tree addr_base, addr_expr;
148
  tree dest, new_stmt;
149
  tree base_offset = unshare_expr (DR_OFFSET (dr));
150
  tree init = unshare_expr (DR_INIT (dr));
151
 
152
  /* Create base_offset */
153
  base_offset = size_binop (PLUS_EXPR, base_offset, init);
154
  dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
155
  add_referenced_tmp_var (dest);
156
  base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
157
  append_to_statement_list_force (new_stmt, new_stmt_list);
158
 
159
  if (offset)
160
    {
161
      tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
162
      add_referenced_tmp_var (tmp);
163
      offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
164
                            DR_STEP (dr));
165
      base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
166
                                 base_offset, offset);
167
      base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
168
      append_to_statement_list_force (new_stmt, new_stmt_list);
169
    }
170
 
171
  /* base + base_offset */
172
  addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
173
                           base_offset);
174
 
175
  /* addr_expr = addr_base */
176
  addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
177
                                     get_name (base_name));
178
  add_referenced_tmp_var (addr_expr);
179
  vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
180
  new_temp = make_ssa_name (addr_expr, vec_stmt);
181
  TREE_OPERAND (vec_stmt, 0) = new_temp;
182
  append_to_statement_list_force (vec_stmt, new_stmt_list);
183
 
184
  if (vect_print_dump_info (REPORT_DETAILS))
185
    {
186
      fprintf (vect_dump, "created ");
187
      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
188
    }
189
  return new_temp;
190
}
191
 
192
 
193
/* Function vect_align_data_ref.
194
 
195
   Handle misalignment of a memory accesses.
196
 
197
   FORNOW: Can't handle misaligned accesses.
198
   Make sure that the dataref is aligned.  */
199
 
200
static void
201
vect_align_data_ref (tree stmt)
202
{
203
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
204
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
205
 
206
  /* FORNOW: can't handle misaligned accesses;
207
             all accesses expected to be aligned.  */
208
  gcc_assert (aligned_access_p (dr));
209
}
210
 
211
 
212
/* Function vect_create_data_ref_ptr.
213
 
214
   Create a memory reference expression for vector access, to be used in a
215
   vector load/store stmt. The reference is based on a new pointer to vector
216
   type (vp).
217
 
218
   Input:
219
   1. STMT: a stmt that references memory. Expected to be of the form
220
         MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
221
   2. BSI: block_stmt_iterator where new stmts can be added.
222
   3. OFFSET (optional): an offset to be added to the initial address accessed
223
        by the data-ref in STMT.
224
   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
225
        pointing to the initial address.
226
 
227
   Output:
228
   1. Declare a new ptr to vector_type, and have it point to the base of the
229
      data reference (initial addressed accessed by the data reference).
230
      For example, for vector of type V8HI, the following code is generated:
231
 
232
      v8hi *vp;
233
      vp = (v8hi *)initial_address;
234
 
235
      if OFFSET is not supplied:
236
         initial_address = &a[init];
237
      if OFFSET is supplied:
238
         initial_address = &a[init + OFFSET];
239
 
240
      Return the initial_address in INITIAL_ADDRESS.
241
 
242
   2. If ONLY_INIT is true, return the initial pointer.  Otherwise, create
243
      a data-reference in the loop based on the new vector pointer vp.  This
244
      new data reference will by some means be updated each iteration of
245
      the loop.  Return the pointer vp'.
246
 
247
   FORNOW: handle only aligned and consecutive accesses.  */
248
 
249
static tree
250
vect_create_data_ref_ptr (tree stmt,
251
                          block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
252
                          tree offset, tree *initial_address, bool only_init)
253
{
254
  tree base_name;
255
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
257
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
258
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
259
  tree vect_ptr_type;
260
  tree vect_ptr;
261
  tree tag;
262
  tree new_temp;
263
  tree vec_stmt;
264
  tree new_stmt_list = NULL_TREE;
265
  edge pe = loop_preheader_edge (loop);
266
  basic_block new_bb;
267
  tree vect_ptr_init;
268
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
269
 
270
  base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
271
 
272
  if (vect_print_dump_info (REPORT_DETAILS))
273
    {
274
      tree data_ref_base = base_name;
275
      fprintf (vect_dump, "create vector-pointer variable to type: ");
276
      print_generic_expr (vect_dump, vectype, TDF_SLIM);
277
      if (TREE_CODE (data_ref_base) == VAR_DECL)
278
        fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
279
      else if (TREE_CODE (data_ref_base) == ARRAY_REF)
280
        fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
281
      else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
282
        fprintf (vect_dump, "  vectorizing a record based array ref: ");
283
      else if (TREE_CODE (data_ref_base) == SSA_NAME)
284
        fprintf (vect_dump, "  vectorizing a pointer ref: ");
285
      print_generic_expr (vect_dump, base_name, TDF_SLIM);
286
    }
287
 
288
  /** (1) Create the new vector-pointer variable:  **/
289
 
290
  vect_ptr_type = build_pointer_type (vectype);
291
  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
292
                                    get_name (base_name));
293
  add_referenced_tmp_var (vect_ptr);
294
 
295
 
296
  /** (2) Add aliasing information to the new vector-pointer:
297
          (The points-to info (DR_PTR_INFO) may be defined later.)  **/
298
 
299
  tag = DR_MEMTAG (dr);
300
  gcc_assert (tag);
301
 
302
  /* If tag is a variable (and NOT_A_TAG) than a new type alias
303
     tag must be created with tag added to its may alias list.  */
304
  if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
305
    new_type_alias (vect_ptr, tag);
306
  else
307
    var_ann (vect_ptr)->type_mem_tag = tag;
308
 
309
  var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
310
 
311
  /** (3) Calculate the initial address the vector-pointer, and set
312
          the vector-pointer to point to it before the loop:  **/
313
 
314
  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
315
  new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
316
                                                   offset);
317
  pe = loop_preheader_edge (loop);
318
  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
319
  gcc_assert (!new_bb);
320
  *initial_address = new_temp;
321
 
322
  /* Create: p = (vectype *) initial_base  */
323
  vec_stmt = fold_convert (vect_ptr_type, new_temp);
324
  vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
325
  vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
326
  TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
327
  new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
328
  gcc_assert (!new_bb);
329
 
330
 
331
  /** (4) Handle the updating of the vector-pointer inside the loop: **/
332
 
333
  if (only_init) /* No update in loop is required.  */
334
    {
335
      /* Copy the points-to information if it exists. */
336
      if (DR_PTR_INFO (dr))
337
        duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
338
      return vect_ptr_init;
339
    }
340
  else
341
    {
342
      block_stmt_iterator incr_bsi;
343
      bool insert_after;
344
      tree indx_before_incr, indx_after_incr;
345
      tree incr;
346
 
347
      standard_iv_increment_position (loop, &incr_bsi, &insert_after);
348
      create_iv (vect_ptr_init,
349
                 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
350
                 NULL_TREE, loop, &incr_bsi, insert_after,
351
                 &indx_before_incr, &indx_after_incr);
352
      incr = bsi_stmt (incr_bsi);
353
      set_stmt_info ((tree_ann_t)stmt_ann (incr),
354
                     new_stmt_vec_info (incr, loop_vinfo));
355
 
356
      /* Copy the points-to information if it exists. */
357
      if (DR_PTR_INFO (dr))
358
        {
359
          duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
360
          duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
361
        }
362
      merge_alias_info (vect_ptr_init, indx_before_incr);
363
      merge_alias_info (vect_ptr_init, indx_after_incr);
364
 
365
      return indx_before_incr;
366
    }
367
}
368
 
369
 
370
/* Function vect_create_destination_var.
371
 
372
   Create a new temporary of type VECTYPE.  */
373
 
374
static tree
375
vect_create_destination_var (tree scalar_dest, tree vectype)
376
{
377
  tree vec_dest;
378
  const char *new_name;
379
  tree type;
380
  enum vect_var_kind kind;
381
 
382
  kind = vectype ? vect_simple_var : vect_scalar_var;
383
  type = vectype ? vectype : TREE_TYPE (scalar_dest);
384
 
385
  gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
386
 
387
  new_name = get_name (scalar_dest);
388
  if (!new_name)
389
    new_name = "var_";
390
  vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
391
  add_referenced_tmp_var (vec_dest);
392
 
393
  return vec_dest;
394
}
395
 
396
 
397
/* Function vect_init_vector.
398
 
399
   Insert a new stmt (INIT_STMT) that initializes a new vector variable with
400
   the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
401
   used in the vectorization of STMT.  */
402
 
403
static tree
404
vect_init_vector (tree stmt, tree vector_var)
405
{
406
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
407
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
408
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
409
  tree new_var;
410
  tree init_stmt;
411
  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
412
  tree vec_oprnd;
413
  edge pe;
414
  tree new_temp;
415
  basic_block new_bb;
416
 
417
  new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
418
  add_referenced_tmp_var (new_var);
419
 
420
  init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
421
  new_temp = make_ssa_name (new_var, init_stmt);
422
  TREE_OPERAND (init_stmt, 0) = new_temp;
423
 
424
  pe = loop_preheader_edge (loop);
425
  new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
426
  gcc_assert (!new_bb);
427
 
428
  if (vect_print_dump_info (REPORT_DETAILS))
429
    {
430
      fprintf (vect_dump, "created new init_stmt: ");
431
      print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
432
    }
433
 
434
  vec_oprnd = TREE_OPERAND (init_stmt, 0);
435
  return vec_oprnd;
436
}
437
 
438
 
439
/* Function vect_get_vec_def_for_operand.
440
 
441
   OP is an operand in STMT. This function returns a (vector) def that will be
442
   used in the vectorized stmt for STMT.
443
 
444
   In the case that OP is an SSA_NAME which is defined in the loop, then
445
   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
446
 
447
   In case OP is an invariant or constant, a new stmt that creates a vector def
448
   needs to be introduced.  */
449
 
450
static tree
451
vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
452
{
453
  tree vec_oprnd;
454
  tree vec_stmt;
455
  tree def_stmt;
456
  stmt_vec_info def_stmt_info = NULL;
457
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458
  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
459
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
460
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
461
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
462
  tree vec_inv;
463
  tree vec_cst;
464
  tree t = NULL_TREE;
465
  tree def;
466
  int i;
467
  enum vect_def_type dt;
468
  bool is_simple_use;
469
 
470
  if (vect_print_dump_info (REPORT_DETAILS))
471
    {
472
      fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
473
      print_generic_expr (vect_dump, op, TDF_SLIM);
474
    }
475
 
476
  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
477
  gcc_assert (is_simple_use);
478
  if (vect_print_dump_info (REPORT_DETAILS))
479
    {
480
      if (def)
481
        {
482
          fprintf (vect_dump, "def =  ");
483
          print_generic_expr (vect_dump, def, TDF_SLIM);
484
        }
485
      if (def_stmt)
486
        {
487
          fprintf (vect_dump, "  def_stmt =  ");
488
          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
489
        }
490
    }
491
 
492
  switch (dt)
493
    {
494
    /* Case 1: operand is a constant.  */
495
    case vect_constant_def:
496
      {
497
        if (scalar_def)
498
          *scalar_def = op;
499
 
500
        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
501
        if (vect_print_dump_info (REPORT_DETAILS))
502
          fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
503
 
504
        for (i = nunits - 1; i >= 0; --i)
505
          {
506
            t = tree_cons (NULL_TREE, op, t);
507
          }
508
        vec_cst = build_vector (vectype, t);
509
        return vect_init_vector (stmt, vec_cst);
510
      }
511
 
512
    /* Case 2: operand is defined outside the loop - loop invariant.  */
513
    case vect_invariant_def:
514
      {
515
        if (scalar_def)
516
          *scalar_def = def;
517
 
518
        /* Create 'vec_inv = {inv,inv,..,inv}'  */
519
        if (vect_print_dump_info (REPORT_DETAILS))
520
          fprintf (vect_dump, "Create vector_inv.");
521
 
522
        for (i = nunits - 1; i >= 0; --i)
523
          {
524
            t = tree_cons (NULL_TREE, def, t);
525
          }
526
 
527
        /* FIXME: use build_constructor directly.  */
528
        vec_inv = build_constructor_from_list (vectype, t);
529
        return vect_init_vector (stmt, vec_inv);
530
      }
531
 
532
    /* Case 3: operand is defined inside the loop.  */
533
    case vect_loop_def:
534
      {
535
        if (scalar_def)
536
          *scalar_def = def_stmt;
537
 
538
        /* Get the def from the vectorized stmt.  */
539
        def_stmt_info = vinfo_for_stmt (def_stmt);
540
        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
541
        gcc_assert (vec_stmt);
542
        vec_oprnd = TREE_OPERAND (vec_stmt, 0);
543
        return vec_oprnd;
544
      }
545
 
546
    /* Case 4: operand is defined by a loop header phi - reduction  */
547
    case vect_reduction_def:
548
      {
549
        gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
550
 
551
        /* Get the def before the loop  */
552
        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
553
        return get_initial_def_for_reduction (stmt, op, scalar_def);
554
     }
555
 
556
    /* Case 5: operand is defined by loop-header phi - induction.  */
557
    case vect_induction_def:
558
      {
559
        if (vect_print_dump_info (REPORT_DETAILS))
560
          fprintf (vect_dump, "induction - unsupported.");
561
        internal_error ("no support for induction"); /* FORNOW */
562
      }
563
 
564
    default:
565
      gcc_unreachable ();
566
    }
567
}
568
 
569
 
570
/* Function vect_finish_stmt_generation.
571
 
572
   Insert a new stmt.  */
573
 
574
static void
575
vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
576
{
577
  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
578
 
579
  if (vect_print_dump_info (REPORT_DETAILS))
580
    {
581
      fprintf (vect_dump, "add new stmt: ");
582
      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
583
    }
584
 
585
  /* Make sure bsi points to the stmt that is being vectorized.  */
586
  gcc_assert (stmt == bsi_stmt (*bsi));
587
 
588
#ifdef USE_MAPPED_LOCATION
589
  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
590
#else
591
  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
592
#endif
593
}
594
 
595
 
596
#define ADJUST_IN_EPILOG 1
597
 
598
/* Function get_initial_def_for_reduction
599
 
600
   Input:
601
   STMT - a stmt that performs a reduction operation in the loop.
602
   INIT_VAL - the initial value of the reduction variable
603
 
604
   Output:
605
   SCALAR_DEF - a tree that holds a value to be added to the final result
606
        of the reduction (used for "ADJUST_IN_EPILOG" - see below).
607
   Return a vector variable, initialized according to the operation that STMT
608
        performs. This vector will be used as the initial value of the
609
        vector of partial results.
610
 
611
   Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
612
     add:         [0,0,...,0,0]
613
     mult:        [1,1,...,1,1]
614
     min/max:     [init_val,init_val,..,init_val,init_val]
615
     bit and/or:  [init_val,init_val,..,init_val,init_val]
616
   and when necessary (e.g. add/mult case) let the caller know
617
   that it needs to adjust the result by init_val.
618
 
619
   Option2: Initialize the vector as follows:
620
     add:         [0,0,...,0,init_val]
621
     mult:        [1,1,...,1,init_val]
622
     min/max:     [init_val,init_val,...,init_val]
623
     bit and/or:  [init_val,init_val,...,init_val]
624
   and no adjustments are needed.
625
 
626
   For example, for the following code:
627
 
628
   s = init_val;
629
   for (i=0;i<n;i++)
630
     s = s + a[i];
631
 
632
   STMT is 's = s + a[i]', and the reduction variable is 's'.
633
   For a vector of 4 units, we want to return either [0,0,0,init_val],
634
   or [0,0,0,0] and let the caller know that it needs to adjust
635
   the result at the end by 'init_val'.
636
 
637
   FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
638
   TODO: Use some cost-model to estimate which scheme is more profitable.
639
*/
640
 
641
static tree
642
get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
643
{
644
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
645
  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
646
  int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
647
  int nelements;
648
  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
649
  tree type = TREE_TYPE (init_val);
650
  tree def;
651
  tree vec, t = NULL_TREE;
652
  bool need_epilog_adjust;
653
  int i;
654
 
655
  gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
656
 
657
  switch (code)
658
  {
659
  case PLUS_EXPR:
660
    if (INTEGRAL_TYPE_P (type))
661
      def = build_int_cst (type, 0);
662
    else
663
      def = build_real (type, dconst0);
664
 
665
#ifdef ADJUST_IN_EPILOG
666
    /* All the 'nunits' elements are set to 0. The final result will be
667
       adjusted by 'init_val' at the loop epilog.  */
668
    nelements = nunits;
669
    need_epilog_adjust = true;
670
#else
671
    /* 'nunits - 1' elements are set to 0; The last element is set to
672
        'init_val'.  No further adjustments at the epilog are needed.  */
673
    nelements = nunits - 1;
674
    need_epilog_adjust = false;
675
#endif
676
    break;
677
 
678
  case MIN_EXPR:
679
  case MAX_EXPR:
680
    def = init_val;
681
    nelements = nunits;
682
    need_epilog_adjust = false;
683
    break;
684
 
685
  default:
686
    gcc_unreachable ();
687
  }
688
 
689
  for (i = nelements - 1; i >= 0; --i)
690
    t = tree_cons (NULL_TREE, def, t);
691
 
692
  if (nelements == nunits - 1)
693
    {
694
      /* Set the last element of the vector.  */
695
      t = tree_cons (NULL_TREE, init_val, t);
696
      nelements += 1;
697
    }
698
  gcc_assert (nelements == nunits);
699
 
700
  if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
701
    vec = build_vector (vectype, t);
702
  else
703
    vec = build_constructor_from_list (vectype, t);
704
 
705
  if (!need_epilog_adjust)
706
    *scalar_def = NULL_TREE;
707
  else
708
    *scalar_def = init_val;
709
 
710
  return vect_init_vector (stmt, vec);
711
}
712
 
713
 
714
/* Function vect_create_epilog_for_reduction:
715
 
716
   Create code at the loop-epilog to finalize the result of a reduction
717
   computation.
718
 
719
   LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
720
   into a single result, by applying the operation REDUC_CODE on the
721
   partial-results-vector. For this, we need to create a new phi node at the
722
   loop exit to preserve loop-closed form, as illustrated below.
723
 
724
   STMT is the original scalar reduction stmt that is being vectorized.
725
   REDUCTION_OP is the scalar reduction-variable.
726
   REDUCTION_PHI is the phi-node that carries the reduction computation.
727
   This function also sets the arguments for the REDUCTION_PHI:
728
   The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
729
   The loop-latch argument is VECT_DEF - the vector of partial sums.
730
 
731
     This function transforms this:
732
 
733
        loop:
734
          vec_def = phi <null, null>    # REDUCTION_PHI
735
          ....
736
          VECT_DEF = ...
737
 
738
        loop_exit:
739
          s_out0 = phi <s_loop>         # EXIT_PHI
740
 
741
          use <s_out0>
742
          use <s_out0>
743
 
744
     Into:
745
 
746
        loop:
747
          vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
748
          ....
749
          VECT_DEF = ...
750
 
751
        loop_exit:
752
          s_out0 = phi <s_loop>         # EXIT_PHI
753
          v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
754
 
755
          v_out2 = reduc_expr <v_out1>
756
          s_out3 = extract_field <v_out2, 0>
757
 
758
          use <s_out3>
759
          use <s_out3>
760
*/
761
 
762
static void
763
vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
764
                                  enum tree_code reduc_code, tree reduction_phi)
765
{
766
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
767
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
768
  enum machine_mode mode = TYPE_MODE (vectype);
769
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
770
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
771
  basic_block exit_bb;
772
  tree scalar_dest = TREE_OPERAND (stmt, 0);
773
  tree scalar_type = TREE_TYPE (scalar_dest);
774
  tree new_phi;
775
  block_stmt_iterator exit_bsi;
776
  tree vec_dest;
777
  tree new_temp;
778
  tree new_name;
779
  tree epilog_stmt;
780
  tree new_scalar_dest, exit_phi;
781
  tree bitsize, bitpos, bytesize;
782
  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
783
  tree scalar_initial_def;
784
  tree vec_initial_def;
785
  tree orig_name;
786
  imm_use_iterator imm_iter;
787
  use_operand_p use_p;
788
  bool extract_scalar_result;
789
 
790
  /*** 1. Create the reduction def-use cycle  ***/
791
 
792
  /* 1.1 set the loop-entry arg of the reduction-phi:  */
793
  /* For the case of reduction, vect_get_vec_def_for_operand returns
794
     the scalar def before the loop, that defines the initial value
795
     of the reduction variable.  */
796
  vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
797
                                                  &scalar_initial_def);
798
  add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
799
 
800
 
801
  /* 1.2 set the loop-latch arg for the reduction-phi:  */
802
  add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
803
 
804
  if (vect_print_dump_info (REPORT_DETAILS))
805
    {
806
      fprintf (vect_dump, "transform reduction: created def-use cycle:");
807
      print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
808
      fprintf (vect_dump, "\n");
809
      print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
810
    }
811
 
812
 
813
  /*** 2. Create epilog code ***/
814
 
815
  /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
816
        v_out1 = phi <v_loop>  */
817
 
818
  exit_bb = loop->single_exit->dest;
819
  new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
820
  SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
821
 
822
  exit_bsi = bsi_start (exit_bb);
823
 
824
 
825
  new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
826
  bitsize = TYPE_SIZE (scalar_type);
827
  bytesize = TYPE_SIZE_UNIT (scalar_type);
828
 
829
  /* 2.2 Create the reduction code.  */
830
 
831
  if (reduc_code < NUM_TREE_CODES)
832
    {
833
      /*** Case 1:  Create:
834
           v_out2 = reduc_expr <v_out1>  */
835
 
836
      if (vect_print_dump_info (REPORT_DETAILS))
837
        fprintf (vect_dump, "Reduce using direct vector reduction.");
838
 
839
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
840
      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
841
                        build1 (reduc_code, vectype,  PHI_RESULT (new_phi)));
842
      new_temp = make_ssa_name (vec_dest, epilog_stmt);
843
      TREE_OPERAND (epilog_stmt, 0) = new_temp;
844
      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
845
 
846
      extract_scalar_result = true;
847
    }
848
  else
849
    {
850
      enum tree_code shift_code = 0;
851
      bool have_whole_vector_shift = true;
852
      enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
853
      int bit_offset;
854
      int element_bitsize = tree_low_cst (bitsize, 1);
855
      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
856
      tree vec_temp;
857
 
858
      /* The result of the reduction is expected to be at the least
859
         significant bits of the vector.  This is merely convention,
860
         as it's the extraction later that really matters, and that
861
         is also under our control.  */
862
      if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
863
        shift_code = VEC_RSHIFT_EXPR;
864
      else
865
        have_whole_vector_shift = false;
866
 
867
      /* Regardless of whether we have a whole vector shift, if we're
868
         emulating the operation via tree-vect-generic, we don't want
869
         to use it.  Only the first round of the reduction is likely
870
         to still be profitable via emulation.  */
871
      /* ??? It might be better to emit a reduction tree code here, so that
872
         tree-vect-generic can expand the first round via bit tricks.  */
873
      if (!VECTOR_MODE_P (mode))
874
        have_whole_vector_shift = false;
875
      else
876
        {
877
          optab optab = optab_for_tree_code (code, vectype);
878
          if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
879
            have_whole_vector_shift = false;
880
        }
881
 
882
      if (have_whole_vector_shift)
883
        {
884
          /*** Case 2:
885
             for (offset = VS/2; offset >= element_size; offset/=2)
886
                {
887
                  Create:  va' = vec_shift <va, offset>
888
                  Create:  va = vop <va, va'>
889
                }  */
890
 
891
          if (vect_print_dump_info (REPORT_DETAILS))
892
            fprintf (vect_dump, "Reduce using vector shifts");
893
 
894
          vec_dest = vect_create_destination_var (scalar_dest, vectype);
895
          new_temp = PHI_RESULT (new_phi);
896
 
897
          for (bit_offset = vec_size_in_bits/2;
898
               bit_offset >= element_bitsize;
899
               bit_offset /= 2)
900
            {
901
              tree bitpos = size_int (bit_offset);
902
 
903
              epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
904
              build2 (shift_code, vectype, new_temp, bitpos));
905
              new_name = make_ssa_name (vec_dest, epilog_stmt);
906
              TREE_OPERAND (epilog_stmt, 0) = new_name;
907
              bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
908
              if (vect_print_dump_info (REPORT_DETAILS))
909
                print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
910
 
911
 
912
              epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
913
              build2 (code, vectype, new_name, new_temp));
914
              new_temp = make_ssa_name (vec_dest, epilog_stmt);
915
              TREE_OPERAND (epilog_stmt, 0) = new_temp;
916
              bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
917
              if (vect_print_dump_info (REPORT_DETAILS))
918
                print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
919
            }
920
 
921
          extract_scalar_result = true;
922
        }
923
      else
924
        {
925
          tree rhs;
926
 
927
          /*** Case 3:
928
             Create:
929
             s = extract_field <v_out2, 0>
930
             for (offset=element_size; offset<vector_size; offset+=element_size;)
931
               {
932
                 Create:  s' = extract_field <v_out2, offset>
933
                 Create:  s = op <s, s'>
934
               }  */
935
 
936
          if (vect_print_dump_info (REPORT_DETAILS))
937
            fprintf (vect_dump, "Reduce using scalar code. ");
938
 
939
          vec_temp = PHI_RESULT (new_phi);
940
          vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
941
 
942
          rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
943
                         bitsize_zero_node);
944
 
945
          BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
946
          epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
947
                                rhs);
948
          new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
949
          TREE_OPERAND (epilog_stmt, 0) = new_temp;
950
          bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
951
          if (vect_print_dump_info (REPORT_DETAILS))
952
            print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
953
 
954
          for (bit_offset = element_bitsize;
955
               bit_offset < vec_size_in_bits;
956
               bit_offset += element_bitsize)
957
            {
958
              tree bitpos = bitsize_int (bit_offset);
959
              tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
960
                                 bitpos);
961
 
962
              BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
963
              epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
964
                                    rhs);
965
              new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
966
              TREE_OPERAND (epilog_stmt, 0) = new_name;
967
              bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
968
              if (vect_print_dump_info (REPORT_DETAILS))
969
                print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
970
 
971
 
972
              epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
973
                                build2 (code, scalar_type, new_name, new_temp));
974
              new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
975
              TREE_OPERAND (epilog_stmt, 0) = new_temp;
976
              bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
977
              if (vect_print_dump_info (REPORT_DETAILS))
978
                print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
979
            }
980
 
981
          extract_scalar_result = false;
982
        }
983
    }
984
 
985
 
986
  /* 2.3  Extract the final scalar result.  Create:
987
         s_out3 = extract_field <v_out2, bitpos>  */
988
 
989
  if (extract_scalar_result)
990
    {
991
      tree rhs;
992
 
993
      if (vect_print_dump_info (REPORT_DETAILS))
994
        fprintf (vect_dump, "extract scalar result");
995
 
996
      /* The result is in the low order bits.  */
997
      if (BYTES_BIG_ENDIAN)
998
        bitpos = size_binop (MULT_EXPR,
999
                       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1000
                       TYPE_SIZE (scalar_type));
1001
      else
1002
        bitpos = bitsize_zero_node;
1003
 
1004
      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1005
      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1006
      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1007
      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1008
      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1009
      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1010
      if (vect_print_dump_info (REPORT_DETAILS))
1011
        print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1012
    }
1013
 
1014
 
1015
  /* 2.4 Adjust the final result by the initial value of the reduction
1016
         variable. (when such adjustment is not needed, then
1017
         'scalar_initial_def' is zero).
1018
 
1019
         Create:
1020
         s_out = scalar_expr <s_out, scalar_initial_def>  */
1021
 
1022
  if (scalar_initial_def)
1023
    {
1024
      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1025
                      build2 (code, scalar_type, new_temp, scalar_initial_def));
1026
      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1027
      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1028
      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1029
 
1030
      if (vect_print_dump_info (REPORT_DETAILS))
1031
        print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1032
    }
1033
 
1034
 
1035
  /* 2.5 Replace uses of s_out0 with uses of s_out3  */
1036
 
1037
  /* Find the loop-closed-use at the loop exit of the original
1038
     scalar result.  (The reduction result is expected to have
1039
     two immediate uses - one at the latch block, and one at the
1040
     loop exit).  */
1041
  exit_phi = NULL;
1042
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1043
    {
1044
      if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1045
        {
1046
          exit_phi = USE_STMT (use_p);
1047
          break;
1048
        }
1049
    }
1050
 
1051
  orig_name = PHI_RESULT (exit_phi);
1052
 
1053
  FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1054
    SET_USE (use_p, new_temp);
1055
}
1056
 
1057
 
1058
/* Function vectorizable_reduction.
1059
 
1060
   Check if STMT performs a reduction operation that can be vectorized.
1061
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1062
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1063
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1064
 
1065
bool
1066
vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1067
{
1068
  tree vec_dest;
1069
  tree scalar_dest;
1070
  tree op0, op1;
1071
  tree loop_vec_def;
1072
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1073
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1074
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1075
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1076
  tree operation;
1077
  enum tree_code code, reduc_code = 0;
1078
  enum machine_mode vec_mode;
1079
  int op_type;
1080
  optab optab, reduc_optab;
1081
  tree new_temp;
1082
  tree def0, def1, def_stmt0, def_stmt1;
1083
  enum vect_def_type dt0, dt1;
1084
  tree new_phi;
1085
  tree scalar_type;
1086
  bool is_simple_use0;
1087
  bool is_simple_use1;
1088
 
1089
  /* Is vectorizable reduction?  */
1090
 
1091
  /* Not supportable if the reduction variable is used in the loop.  */
1092
  if (STMT_VINFO_RELEVANT_P (stmt_info))
1093
    return false;
1094
 
1095
  if (!STMT_VINFO_LIVE_P (stmt_info))
1096
    return false;
1097
 
1098
  /* Make sure it was already recognized as a reduction pattern.  */
1099
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1100
    return false;
1101
 
1102
  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1103
 
1104
  operation = TREE_OPERAND (stmt, 1);
1105
  code = TREE_CODE (operation);
1106
  op_type = TREE_CODE_LENGTH (code);
1107
 
1108
  if (op_type != binary_op)
1109
    return false;
1110
 
1111
  op0 = TREE_OPERAND (operation, 0);
1112
  op1 = TREE_OPERAND (operation, 1);
1113
  scalar_dest = TREE_OPERAND (stmt, 0);
1114
  scalar_type = TREE_TYPE (scalar_dest);
1115
 
1116
  /* Check the first operand. It is expected to be defined inside the loop.  */
1117
  is_simple_use0 =
1118
        vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1119
  is_simple_use1 =
1120
        vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1121
 
1122
  gcc_assert (is_simple_use0);
1123
  gcc_assert (is_simple_use1);
1124
  gcc_assert (dt0 == vect_loop_def);
1125
  gcc_assert (dt1 == vect_reduction_def);
1126
  gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1127
  gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1128
 
1129
  if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1130
   return false;
1131
 
1132
  /* Supportable by target?  */
1133
 
1134
  /* check support for the operation in the loop  */
1135
  optab = optab_for_tree_code (code, vectype);
1136
  if (!optab)
1137
    {
1138
      if (vect_print_dump_info (REPORT_DETAILS))
1139
        fprintf (vect_dump, "no optab.");
1140
      return false;
1141
    }
1142
  vec_mode = TYPE_MODE (vectype);
1143
  if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1144
    {
1145
      if (vect_print_dump_info (REPORT_DETAILS))
1146
        fprintf (vect_dump, "op not supported by target.");
1147
      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1148
          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1149
             < vect_min_worthwhile_factor (code))
1150
        return false;
1151
      if (vect_print_dump_info (REPORT_DETAILS))
1152
        fprintf (vect_dump, "proceeding using word mode.");
1153
    }
1154
 
1155
  /* Worthwhile without SIMD support?  */
1156
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1157
      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1158
         < vect_min_worthwhile_factor (code))
1159
    {
1160
      if (vect_print_dump_info (REPORT_DETAILS))
1161
        fprintf (vect_dump, "not worthwhile without SIMD support.");
1162
      return false;
1163
    }
1164
 
1165
  /* check support for the epilog operation  */
1166
  if (!reduction_code_for_scalar_code (code, &reduc_code))
1167
    return false;
1168
  reduc_optab = optab_for_tree_code (reduc_code, vectype);
1169
  if (!reduc_optab)
1170
    {
1171
      if (vect_print_dump_info (REPORT_DETAILS))
1172
        fprintf (vect_dump, "no optab for reduction.");
1173
      reduc_code = NUM_TREE_CODES;
1174
    }
1175
  if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1176
    {
1177
      if (vect_print_dump_info (REPORT_DETAILS))
1178
        fprintf (vect_dump, "reduc op not supported by target.");
1179
      reduc_code = NUM_TREE_CODES;
1180
    }
1181
 
1182
  if (!vec_stmt) /* transformation not required.  */
1183
    {
1184
      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1185
      return true;
1186
    }
1187
 
1188
  /** Transform.  **/
1189
 
1190
  if (vect_print_dump_info (REPORT_DETAILS))
1191
    fprintf (vect_dump, "transform reduction.");
1192
 
1193
  /* Create the destination vector  */
1194
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1195
 
1196
 
1197
  /* Create the reduction-phi that defines the reduction-operand.  */
1198
  new_phi = create_phi_node (vec_dest, loop->header);
1199
 
1200
 
1201
  /* Prepare the operand that is defined inside the loop body  */
1202
  loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1203
 
1204
  /* Create the vectorized operation that computes the partial results  */
1205
  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1206
                build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1207
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1208
  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1209
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1210
 
1211
 
1212
  /* Finalize the reduction-phi (set it's arguments) and create the
1213
     epilog reduction code.  */
1214
  vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1215
  return true;
1216
}
1217
 
1218
 
1219
/* Function vectorizable_assignment.
1220
 
1221
   Check if STMT performs an assignment (copy) that can be vectorized.
1222
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1223
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1224
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1225
 
1226
bool
1227
vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1228
{
1229
  tree vec_dest;
1230
  tree scalar_dest;
1231
  tree op;
1232
  tree vec_oprnd;
1233
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1234
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1235
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1236
  tree new_temp;
1237
  tree def, def_stmt;
1238
  enum vect_def_type dt;
1239
 
1240
  /* Is vectorizable assignment?  */
1241
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1242
    return false;
1243
 
1244
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1245
 
1246
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1247
    return false;
1248
 
1249
  scalar_dest = TREE_OPERAND (stmt, 0);
1250
  if (TREE_CODE (scalar_dest) != SSA_NAME)
1251
    return false;
1252
 
1253
  op = TREE_OPERAND (stmt, 1);
1254
  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1255
    {
1256
      if (vect_print_dump_info (REPORT_DETAILS))
1257
        fprintf (vect_dump, "use not simple.");
1258
      return false;
1259
    }
1260
 
1261
  if (!vec_stmt) /* transformation not required.  */
1262
    {
1263
      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1264
      return true;
1265
    }
1266
 
1267
  /** Transform.  **/
1268
  if (vect_print_dump_info (REPORT_DETAILS))
1269
    fprintf (vect_dump, "transform assignment.");
1270
 
1271
  /* Handle def.  */
1272
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1273
 
1274
  /* Handle use.  */
1275
  op = TREE_OPERAND (stmt, 1);
1276
  vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1277
 
1278
  /* Arguments are ready. create the new vector stmt.  */
1279
  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1280
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1281
  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1282
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1283
 
1284
  return true;
1285
}
1286
 
1287
 
1288
/* Function vect_min_worthwhile_factor.
1289
 
1290
   For a loop where we could vectorize the operation indicated by CODE,
1291
   return the minimum vectorization factor that makes it worthwhile
1292
   to use generic vectors.  */
1293
static int
1294
vect_min_worthwhile_factor (enum tree_code code)
1295
{
1296
  switch (code)
1297
    {
1298
    case PLUS_EXPR:
1299
    case MINUS_EXPR:
1300
    case NEGATE_EXPR:
1301
      return 4;
1302
 
1303
    case BIT_AND_EXPR:
1304
    case BIT_IOR_EXPR:
1305
    case BIT_XOR_EXPR:
1306
    case BIT_NOT_EXPR:
1307
      return 2;
1308
 
1309
    default:
1310
      return INT_MAX;
1311
    }
1312
}
1313
 
1314
 
1315
/* Function vectorizable_operation.
1316
 
1317
   Check if STMT performs a binary or unary operation that can be vectorized.
1318
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1319
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1320
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1321
 
1322
bool
1323
vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1324
{
1325
  tree vec_dest;
1326
  tree scalar_dest;
1327
  tree operation;
1328
  tree op0, op1 = NULL;
1329
  tree vec_oprnd0, vec_oprnd1=NULL;
1330
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1331
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1332
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1333
  int i;
1334
  enum tree_code code;
1335
  enum machine_mode vec_mode;
1336
  tree new_temp;
1337
  int op_type;
1338
  tree op;
1339
  optab optab;
1340
  int icode;
1341
  enum machine_mode optab_op2_mode;
1342
  tree def, def_stmt;
1343
  enum vect_def_type dt;
1344
 
1345
  /* Is STMT a vectorizable binary/unary operation?   */
1346
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1347
    return false;
1348
 
1349
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1350
 
1351
  if (STMT_VINFO_LIVE_P (stmt_info))
1352
    {
1353
      /* FORNOW: not yet supported.  */
1354
      if (vect_print_dump_info (REPORT_DETAILS))
1355
        fprintf (vect_dump, "value used after loop.");
1356
      return false;
1357
    }
1358
 
1359
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1360
    return false;
1361
 
1362
  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1363
    return false;
1364
 
1365
  operation = TREE_OPERAND (stmt, 1);
1366
  code = TREE_CODE (operation);
1367
  optab = optab_for_tree_code (code, vectype);
1368
 
1369
  /* Support only unary or binary operations.  */
1370
  op_type = TREE_CODE_LENGTH (code);
1371
  if (op_type != unary_op && op_type != binary_op)
1372
    {
1373
      if (vect_print_dump_info (REPORT_DETAILS))
1374
        fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1375
      return false;
1376
    }
1377
 
1378
  for (i = 0; i < op_type; i++)
1379
    {
1380
      op = TREE_OPERAND (operation, i);
1381
      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1382
        {
1383
          if (vect_print_dump_info (REPORT_DETAILS))
1384
            fprintf (vect_dump, "use not simple.");
1385
          return false;
1386
        }
1387
    }
1388
 
1389
  /* Supportable by target?  */
1390
  if (!optab)
1391
    {
1392
      if (vect_print_dump_info (REPORT_DETAILS))
1393
        fprintf (vect_dump, "no optab.");
1394
      return false;
1395
    }
1396
  vec_mode = TYPE_MODE (vectype);
1397
  icode = (int) optab->handlers[(int) vec_mode].insn_code;
1398
  if (icode == CODE_FOR_nothing)
1399
    {
1400
      if (vect_print_dump_info (REPORT_DETAILS))
1401
        fprintf (vect_dump, "op not supported by target.");
1402
      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1403
          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1404
             < vect_min_worthwhile_factor (code))
1405
        return false;
1406
      if (vect_print_dump_info (REPORT_DETAILS))
1407
        fprintf (vect_dump, "proceeding using word mode.");
1408
    }
1409
 
1410
  /* Worthwhile without SIMD support?  */
1411
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1412
      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1413
         < vect_min_worthwhile_factor (code))
1414
    {
1415
      if (vect_print_dump_info (REPORT_DETAILS))
1416
        fprintf (vect_dump, "not worthwhile without SIMD support.");
1417
      return false;
1418
    }
1419
 
1420
  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1421
    {
1422
      /* FORNOW: not yet supported.  */
1423
      if (!VECTOR_MODE_P (vec_mode))
1424
        return false;
1425
 
1426
      /* Invariant argument is needed for a vector shift
1427
         by a scalar shift operand.  */
1428
      optab_op2_mode = insn_data[icode].operand[2].mode;
1429
      if (! (VECTOR_MODE_P (optab_op2_mode)
1430
             || dt == vect_constant_def
1431
             || dt == vect_invariant_def))
1432
        {
1433
          if (vect_print_dump_info (REPORT_DETAILS))
1434
            fprintf (vect_dump, "operand mode requires invariant argument.");
1435
          return false;
1436
        }
1437
    }
1438
 
1439
  if (!vec_stmt) /* transformation not required.  */
1440
    {
1441
      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1442
      return true;
1443
    }
1444
 
1445
  /** Transform.  **/
1446
 
1447
  if (vect_print_dump_info (REPORT_DETAILS))
1448
    fprintf (vect_dump, "transform binary/unary operation.");
1449
 
1450
  /* Handle def.  */
1451
  scalar_dest = TREE_OPERAND (stmt, 0);
1452
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1453
 
1454
  /* Handle uses.  */
1455
  op0 = TREE_OPERAND (operation, 0);
1456
  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1457
 
1458
  if (op_type == binary_op)
1459
    {
1460
      op1 = TREE_OPERAND (operation, 1);
1461
 
1462
      if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1463
        {
1464
          /* Vector shl and shr insn patterns can be defined with
1465
             scalar operand 2 (shift operand).  In this case, use
1466
             constant or loop invariant op1 directly, without
1467
             extending it to vector mode first.  */
1468
 
1469
          optab_op2_mode = insn_data[icode].operand[2].mode;
1470
          if (!VECTOR_MODE_P (optab_op2_mode))
1471
            {
1472
              if (vect_print_dump_info (REPORT_DETAILS))
1473
                fprintf (vect_dump, "operand 1 using scalar mode.");
1474
              vec_oprnd1 = op1;
1475
            }
1476
        }
1477
 
1478
      if (!vec_oprnd1)
1479
        vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1480
    }
1481
 
1482
  /* Arguments are ready. create the new vector stmt.  */
1483
 
1484
  if (op_type == binary_op)
1485
    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1486
                build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1487
  else
1488
    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1489
                build1 (code, vectype, vec_oprnd0));
1490
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1491
  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1492
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1493
 
1494
  return true;
1495
}
1496
 
1497
 
1498
/* Function vectorizable_store.
1499
 
1500
   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1501
   can be vectorized.
1502
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1503
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1504
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1505
 
1506
bool
1507
vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1508
{
1509
  tree scalar_dest;
1510
  tree data_ref;
1511
  tree op;
1512
  tree vec_oprnd1;
1513
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1514
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1515
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1516
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1517
  enum machine_mode vec_mode;
1518
  tree dummy;
1519
  enum dr_alignment_support alignment_support_cheme;
1520
  ssa_op_iter iter;
1521
  tree def, def_stmt;
1522
  enum vect_def_type dt;
1523
 
1524
  /* Is vectorizable store? */
1525
 
1526
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1527
    return false;
1528
 
1529
  scalar_dest = TREE_OPERAND (stmt, 0);
1530
  if (TREE_CODE (scalar_dest) != ARRAY_REF
1531
      && TREE_CODE (scalar_dest) != INDIRECT_REF)
1532
    return false;
1533
 
1534
  op = TREE_OPERAND (stmt, 1);
1535
  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1536
    {
1537
      if (vect_print_dump_info (REPORT_DETAILS))
1538
        fprintf (vect_dump, "use not simple.");
1539
      return false;
1540
    }
1541
 
1542
  vec_mode = TYPE_MODE (vectype);
1543
  /* FORNOW. In some cases can vectorize even if data-type not supported
1544
     (e.g. - array initialization with 0).  */
1545
  if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1546
    return false;
1547
 
1548
  if (!STMT_VINFO_DATA_REF (stmt_info))
1549
    return false;
1550
 
1551
 
1552
  if (!vec_stmt) /* transformation not required.  */
1553
    {
1554
      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1555
      return true;
1556
    }
1557
 
1558
  /** Transform.  **/
1559
 
1560
  if (vect_print_dump_info (REPORT_DETAILS))
1561
    fprintf (vect_dump, "transform store");
1562
 
1563
  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1564
  gcc_assert (alignment_support_cheme);
1565
  gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
1566
 
1567
  /* Handle use - get the vectorized def from the defining stmt.  */
1568
  vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1569
 
1570
  /* Handle def.  */
1571
  /* FORNOW: make sure the data reference is aligned.  */
1572
  vect_align_data_ref (stmt);
1573
  data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1574
  data_ref = build_fold_indirect_ref (data_ref);
1575
 
1576
  /* Arguments are ready. create the new vector stmt.  */
1577
  *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1578
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1579
 
1580
  /* Copy the V_MAY_DEFS representing the aliasing of the original array
1581
     element's definition to the vector's definition then update the
1582
     defining statement.  The original is being deleted so the same
1583
     SSA_NAMEs can be used.  */
1584
  copy_virtual_operands (*vec_stmt, stmt);
1585
 
1586
  FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1587
    {
1588
      SSA_NAME_DEF_STMT (def) = *vec_stmt;
1589
 
1590
      /* If this virtual def has a use outside the loop and a loop peel is
1591
         performed then the def may be renamed by the peel.  Mark it for
1592
         renaming so the later use will also be renamed.  */
1593
      mark_sym_for_renaming (SSA_NAME_VAR (def));
1594
    }
1595
 
1596
  return true;
1597
}
1598
 
1599
 
1600
/* vectorizable_load.
1601
 
1602
   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1603
   can be vectorized.
1604
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1605
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1606
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1607
 
1608
bool
1609
vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1610
{
1611
  tree scalar_dest;
1612
  tree vec_dest = NULL;
1613
  tree data_ref = NULL;
1614
  tree op;
1615
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1616
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1617
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1618
  tree new_temp;
1619
  int mode;
1620
  tree init_addr;
1621
  tree new_stmt;
1622
  tree dummy;
1623
  basic_block new_bb;
1624
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1625
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1626
  edge pe = loop_preheader_edge (loop);
1627
  enum dr_alignment_support alignment_support_cheme;
1628
 
1629
  /* Is vectorizable load? */
1630
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1631
    return false;
1632
 
1633
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1634
 
1635
  if (STMT_VINFO_LIVE_P (stmt_info))
1636
    {
1637
      /* FORNOW: not yet supported.  */
1638
      if (vect_print_dump_info (REPORT_DETAILS))
1639
        fprintf (vect_dump, "value used after loop.");
1640
      return false;
1641
    }
1642
 
1643
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1644
    return false;
1645
 
1646
  scalar_dest = TREE_OPERAND (stmt, 0);
1647
  if (TREE_CODE (scalar_dest) != SSA_NAME)
1648
    return false;
1649
 
1650
  op = TREE_OPERAND (stmt, 1);
1651
  if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1652
    return false;
1653
 
1654
  if (!STMT_VINFO_DATA_REF (stmt_info))
1655
    return false;
1656
 
1657
  mode = (int) TYPE_MODE (vectype);
1658
 
1659
  /* FORNOW. In some cases can vectorize even if data-type not supported
1660
    (e.g. - data copies).  */
1661
  if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1662
    {
1663
      if (vect_print_dump_info (REPORT_DETAILS))
1664
        fprintf (vect_dump, "Aligned load, but unsupported type.");
1665
      return false;
1666
    }
1667
 
1668
  if (!vec_stmt) /* transformation not required.  */
1669
    {
1670
      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1671
      return true;
1672
    }
1673
 
1674
  /** Transform.  **/
1675
 
1676
  if (vect_print_dump_info (REPORT_DETAILS))
1677
    fprintf (vect_dump, "transform load.");
1678
 
1679
  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1680
  gcc_assert (alignment_support_cheme);
1681
 
1682
  if (alignment_support_cheme == dr_aligned
1683
      || alignment_support_cheme == dr_unaligned_supported)
1684
    {
1685
      /* Create:
1686
         p = initial_addr;
1687
         indx = 0;
1688
         loop {
1689
           vec_dest = *(p);
1690
           indx = indx + 1;
1691
         }
1692
      */
1693
 
1694
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1695
      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1696
      if (aligned_access_p (dr))
1697
        data_ref = build_fold_indirect_ref (data_ref);
1698
      else
1699
        {
1700
          int mis = DR_MISALIGNMENT (dr);
1701
          tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1702
          tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1703
          data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1704
        }
1705
      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1706
      new_temp = make_ssa_name (vec_dest, new_stmt);
1707
      TREE_OPERAND (new_stmt, 0) = new_temp;
1708
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1709
      copy_virtual_operands (new_stmt, stmt);
1710
    }
1711
  else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1712
    {
1713
      /* Create:
1714
         p1 = initial_addr;
1715
         msq_init = *(floor(p1))
1716
         p2 = initial_addr + VS - 1;
1717
         magic = have_builtin ? builtin_result : initial_address;
1718
         indx = 0;
1719
         loop {
1720
           p2' = p2 + indx * vectype_size
1721
           lsq = *(floor(p2'))
1722
           vec_dest = realign_load (msq, lsq, magic)
1723
           indx = indx + 1;
1724
           msq = lsq;
1725
         }
1726
      */
1727
 
1728
      tree offset;
1729
      tree magic;
1730
      tree phi_stmt;
1731
      tree msq_init;
1732
      tree msq, lsq;
1733
      tree dataref_ptr;
1734
      tree params;
1735
 
1736
      /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
1737
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1738
      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1739
                                           &init_addr, true);
1740
      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1741
      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1742
      new_temp = make_ssa_name (vec_dest, new_stmt);
1743
      TREE_OPERAND (new_stmt, 0) = new_temp;
1744
      new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1745
      gcc_assert (!new_bb);
1746
      msq_init = TREE_OPERAND (new_stmt, 0);
1747
      copy_virtual_operands (new_stmt, stmt);
1748
      update_vuses_to_preheader (new_stmt, loop);
1749
 
1750
 
1751
      /* <2> Create lsq = *(floor(p2')) in the loop  */
1752
      offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
1753
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1754
      dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1755
      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1756
      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1757
      new_temp = make_ssa_name (vec_dest, new_stmt);
1758
      TREE_OPERAND (new_stmt, 0) = new_temp;
1759
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1760
      lsq = TREE_OPERAND (new_stmt, 0);
1761
      copy_virtual_operands (new_stmt, stmt);
1762
 
1763
 
1764
      /* <3> */
1765
      if (targetm.vectorize.builtin_mask_for_load)
1766
        {
1767
          /* Create permutation mask, if required, in loop preheader.  */
1768
          tree builtin_decl;
1769
          params = build_tree_list (NULL_TREE, init_addr);
1770
          vec_dest = vect_create_destination_var (scalar_dest, vectype);
1771
          builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1772
          new_stmt = build_function_call_expr (builtin_decl, params);
1773
          new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1774
          new_temp = make_ssa_name (vec_dest, new_stmt);
1775
          TREE_OPERAND (new_stmt, 0) = new_temp;
1776
          new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1777
          gcc_assert (!new_bb);
1778
          magic = TREE_OPERAND (new_stmt, 0);
1779
 
1780
          /* The result of the CALL_EXPR to this builtin is determined from
1781
             the value of the parameter and no global variables are touched
1782
             which makes the builtin a "const" function.  Requiring the
1783
             builtin to have the "const" attribute makes it unnecessary
1784
             to call mark_call_clobbered.  */
1785
          gcc_assert (TREE_READONLY (builtin_decl));
1786
        }
1787
      else
1788
        {
1789
          /* Use current address instead of init_addr for reduced reg pressure.
1790
           */
1791
          magic = dataref_ptr;
1792
        }
1793
 
1794
 
1795
      /* <4> Create msq = phi <msq_init, lsq> in loop  */
1796
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1797
      msq = make_ssa_name (vec_dest, NULL_TREE);
1798
      phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1799
      SSA_NAME_DEF_STMT (msq) = phi_stmt;
1800
      add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1801
      add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1802
 
1803
 
1804
      /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
1805
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1806
      new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1807
      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1808
      new_temp = make_ssa_name (vec_dest, new_stmt);
1809
      TREE_OPERAND (new_stmt, 0) = new_temp;
1810
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1811
    }
1812
  else
1813
    gcc_unreachable ();
1814
 
1815
  *vec_stmt = new_stmt;
1816
  return true;
1817
}
1818
 
1819
 
1820
/* Function vectorizable_live_operation.
1821
 
1822
   STMT computes a value that is used outside the loop. Check if
1823
   it can be supported.  */
1824
 
1825
bool
1826
vectorizable_live_operation (tree stmt,
1827
                             block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1828
                             tree *vec_stmt ATTRIBUTE_UNUSED)
1829
{
1830
  tree operation;
1831
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1832
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1833
  int i;
1834
  enum tree_code code;
1835
  int op_type;
1836
  tree op;
1837
  tree def, def_stmt;
1838
  enum vect_def_type dt;
1839
 
1840
  if (!STMT_VINFO_LIVE_P (stmt_info))
1841
    return false;
1842
 
1843
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1844
    return false;
1845
 
1846
  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1847
    return false;
1848
 
1849
  operation = TREE_OPERAND (stmt, 1);
1850
  code = TREE_CODE (operation);
1851
 
1852
  op_type = TREE_CODE_LENGTH (code);
1853
 
1854
  /* FORNOW: support only if all uses are invariant. This means
1855
     that the scalar operations can remain in place, unvectorized.
1856
     The original last scalar value that they compute will be used.  */
1857
 
1858
  for (i = 0; i < op_type; i++)
1859
    {
1860
      op = TREE_OPERAND (operation, i);
1861
      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1862
        {
1863
          if (vect_print_dump_info (REPORT_DETAILS))
1864
            fprintf (vect_dump, "use not simple.");
1865
          return false;
1866
        }
1867
 
1868
      if (dt != vect_invariant_def && dt != vect_constant_def)
1869
        return false;
1870
    }
1871
 
1872
  /* No transformation is required for the cases we currently support.  */
1873
  return true;
1874
}
1875
 
1876
 
1877
/* Function vect_is_simple_cond.
1878
 
1879
   Input:
1880
   LOOP - the loop that is being vectorized.
1881
   COND - Condition that is checked for simple use.
1882
 
1883
   Returns whether a COND can be vectorized.  Checks whether
1884
   condition operands are supportable using vec_is_simple_use.  */
1885
 
1886
static bool
1887
vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1888
{
1889
  tree lhs, rhs;
1890
  tree def;
1891
  enum vect_def_type dt;
1892
 
1893
  if (!COMPARISON_CLASS_P (cond))
1894
    return false;
1895
 
1896
  lhs = TREE_OPERAND (cond, 0);
1897
  rhs = TREE_OPERAND (cond, 1);
1898
 
1899
  if (TREE_CODE (lhs) == SSA_NAME)
1900
    {
1901
      tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1902
      if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1903
        return false;
1904
    }
1905
  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1906
    return false;
1907
 
1908
  if (TREE_CODE (rhs) == SSA_NAME)
1909
    {
1910
      tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1911
      if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1912
        return false;
1913
    }
1914
  else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
1915
    return false;
1916
 
1917
  return true;
1918
}
1919
 
1920
/* vectorizable_condition.
1921
 
1922
   Check if STMT is conditional modify expression that can be vectorized.
1923
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1924
   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
1925
   at BSI.
1926
 
1927
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1928
 
1929
bool
1930
vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1931
{
1932
  tree scalar_dest = NULL_TREE;
1933
  tree vec_dest = NULL_TREE;
1934
  tree op = NULL_TREE;
1935
  tree cond_expr, then_clause, else_clause;
1936
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1937
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1938
  tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1939
  tree vec_compare, vec_cond_expr;
1940
  tree new_temp;
1941
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1942
  enum machine_mode vec_mode;
1943
  tree def;
1944
  enum vect_def_type dt;
1945
 
1946
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1947
    return false;
1948
 
1949
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1950
 
1951
  if (STMT_VINFO_LIVE_P (stmt_info))
1952
    {
1953
      /* FORNOW: not yet supported.  */
1954
      if (vect_print_dump_info (REPORT_DETAILS))
1955
        fprintf (vect_dump, "value used after loop.");
1956
      return false;
1957
    }
1958
 
1959
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1960
    return false;
1961
 
1962
  op = TREE_OPERAND (stmt, 1);
1963
 
1964
  if (TREE_CODE (op) != COND_EXPR)
1965
    return false;
1966
 
1967
  cond_expr = TREE_OPERAND (op, 0);
1968
  then_clause = TREE_OPERAND (op, 1);
1969
  else_clause = TREE_OPERAND (op, 2);
1970
 
1971
  /* We do not handle two different vector types for the condition
1972
     and the values.  */
1973
  if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
1974
    return false;
1975
 
1976
  if (!vect_is_simple_cond (cond_expr, loop_vinfo))
1977
    return false;
1978
 
1979
  if (TREE_CODE (then_clause) == SSA_NAME)
1980
    {
1981
      tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
1982
      if (!vect_is_simple_use (then_clause, loop_vinfo,
1983
                               &then_def_stmt, &def, &dt))
1984
        return false;
1985
    }
1986
  else if (TREE_CODE (then_clause) != INTEGER_CST
1987
           && TREE_CODE (then_clause) != REAL_CST)
1988
    return false;
1989
 
1990
  if (TREE_CODE (else_clause) == SSA_NAME)
1991
    {
1992
      tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
1993
      if (!vect_is_simple_use (else_clause, loop_vinfo,
1994
                               &else_def_stmt, &def, &dt))
1995
        return false;
1996
    }
1997
  else if (TREE_CODE (else_clause) != INTEGER_CST
1998
           && TREE_CODE (else_clause) != REAL_CST)
1999
    return false;
2000
 
2001
 
2002
  vec_mode = TYPE_MODE (vectype);
2003
 
2004
  if (!vec_stmt)
2005
    {
2006
      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2007
      return expand_vec_cond_expr_p (op, vec_mode);
2008
    }
2009
 
2010
  /* Transform */
2011
 
2012
  /* Handle def.  */
2013
  scalar_dest = TREE_OPERAND (stmt, 0);
2014
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
2015
 
2016
  /* Handle cond expr.  */
2017
  vec_cond_lhs =
2018
    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2019
  vec_cond_rhs =
2020
    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2021
  vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2022
  vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2023
 
2024
  /* Arguments are ready. create the new vector stmt.  */
2025
  vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2026
                        vec_cond_lhs, vec_cond_rhs);
2027
  vec_cond_expr = build (VEC_COND_EXPR, vectype,
2028
                         vec_compare, vec_then_clause, vec_else_clause);
2029
 
2030
  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2031
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
2032
  TREE_OPERAND (*vec_stmt, 0) = new_temp;
2033
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2034
 
2035
  return true;
2036
}
2037
 
2038
/* Function vect_transform_stmt.
2039
 
2040
   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2041
 
2042
bool
2043
vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2044
{
2045
  bool is_store = false;
2046
  tree vec_stmt = NULL_TREE;
2047
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2048
  bool done;
2049
 
2050
  if (STMT_VINFO_RELEVANT_P (stmt_info))
2051
    {
2052
      switch (STMT_VINFO_TYPE (stmt_info))
2053
      {
2054
      case op_vec_info_type:
2055
        done = vectorizable_operation (stmt, bsi, &vec_stmt);
2056
        gcc_assert (done);
2057
        break;
2058
 
2059
      case assignment_vec_info_type:
2060
        done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2061
        gcc_assert (done);
2062
        break;
2063
 
2064
      case load_vec_info_type:
2065
        done = vectorizable_load (stmt, bsi, &vec_stmt);
2066
        gcc_assert (done);
2067
        break;
2068
 
2069
      case store_vec_info_type:
2070
        done = vectorizable_store (stmt, bsi, &vec_stmt);
2071
        gcc_assert (done);
2072
        is_store = true;
2073
        break;
2074
 
2075
      case condition_vec_info_type:
2076
        done = vectorizable_condition (stmt, bsi, &vec_stmt);
2077
        gcc_assert (done);
2078
        break;
2079
 
2080
      default:
2081
        if (vect_print_dump_info (REPORT_DETAILS))
2082
          fprintf (vect_dump, "stmt not supported.");
2083
        gcc_unreachable ();
2084
      }
2085
 
2086
      STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2087
    }
2088
 
2089
  if (STMT_VINFO_LIVE_P (stmt_info))
2090
    {
2091
      switch (STMT_VINFO_TYPE (stmt_info))
2092
      {
2093
      case reduc_vec_info_type:
2094
        done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2095
        gcc_assert (done);
2096
        break;
2097
 
2098
      default:
2099
        done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2100
        gcc_assert (done);
2101
      }
2102
 
2103
      if (vec_stmt)
2104
        {
2105
          gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2106
          STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2107
        }
2108
    }
2109
 
2110
  return is_store;
2111
}
2112
 
2113
 
2114
/* This function builds ni_name = number of iterations loop executes
2115
   on the loop preheader.  */
2116
 
2117
static tree
2118
vect_build_loop_niters (loop_vec_info loop_vinfo)
2119
{
2120
  tree ni_name, stmt, var;
2121
  edge pe;
2122
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2123
  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2124
 
2125
  var = create_tmp_var (TREE_TYPE (ni), "niters");
2126
  add_referenced_tmp_var (var);
2127
  ni_name = force_gimple_operand (ni, &stmt, false, var);
2128
 
2129
  pe = loop_preheader_edge (loop);
2130
  if (stmt)
2131
    {
2132
      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2133
      gcc_assert (!new_bb);
2134
    }
2135
 
2136
  return ni_name;
2137
}
2138
 
2139
 
2140
/* This function generates the following statements:
2141
 
2142
 ni_name = number of iterations loop executes
2143
 ratio = ni_name / vf
2144
 ratio_mult_vf_name = ratio * vf
2145
 
2146
 and places them at the loop preheader edge.  */
2147
 
2148
static void
2149
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2150
                                 tree *ni_name_ptr,
2151
                                 tree *ratio_mult_vf_name_ptr,
2152
                                 tree *ratio_name_ptr)
2153
{
2154
 
2155
  edge pe;
2156
  basic_block new_bb;
2157
  tree stmt, ni_name;
2158
  tree var;
2159
  tree ratio_name;
2160
  tree ratio_mult_vf_name;
2161
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2162
  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2163
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2164
  tree log_vf;
2165
 
2166
  pe = loop_preheader_edge (loop);
2167
 
2168
  /* Generate temporary variable that contains
2169
     number of iterations loop executes.  */
2170
 
2171
  ni_name = vect_build_loop_niters (loop_vinfo);
2172
  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
2173
 
2174
  /* Create: ratio = ni >> log2(vf) */
2175
 
2176
  var = create_tmp_var (TREE_TYPE (ni), "bnd");
2177
  add_referenced_tmp_var (var);
2178
  ratio_name = make_ssa_name (var, NULL_TREE);
2179
  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2180
           build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2181
  SSA_NAME_DEF_STMT (ratio_name) = stmt;
2182
 
2183
  pe = loop_preheader_edge (loop);
2184
  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2185
  gcc_assert (!new_bb);
2186
 
2187
  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2188
 
2189
  var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2190
  add_referenced_tmp_var (var);
2191
  ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2192
  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2193
           build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2194
  SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2195
 
2196
  pe = loop_preheader_edge (loop);
2197
  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2198
  gcc_assert (!new_bb);
2199
 
2200
  *ni_name_ptr = ni_name;
2201
  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2202
  *ratio_name_ptr = ratio_name;
2203
 
2204
  return;
2205
}
2206
 
2207
 
2208
/* Function update_vuses_to_preheader.
2209
 
2210
   Input:
2211
   STMT - a statement with potential VUSEs.
2212
   LOOP - the loop whose preheader will contain STMT.
2213
 
2214
   It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2215
   appears to be defined in a V_MAY_DEF in another statement in a loop.
2216
   One such case is when the VUSE is at the dereference of a __restricted__
2217
   pointer in a load and the V_MAY_DEF is at the dereference of a different
2218
   __restricted__ pointer in a store.  Vectorization may result in
2219
   copy_virtual_uses being called to copy the problematic VUSE to a new
2220
   statement that is being inserted in the loop preheader.  This procedure
2221
   is called to change the SSA_NAME in the new statement's VUSE from the
2222
   SSA_NAME updated in the loop to the related SSA_NAME available on the
2223
   path entering the loop.
2224
 
2225
   When this function is called, we have the following situation:
2226
 
2227
        # vuse <name1>
2228
        S1: vload
2229
    do {
2230
        # name1 = phi < name0 , name2>
2231
 
2232
        # vuse <name1>
2233
        S2: vload
2234
 
2235
        # name2 = vdef <name1>
2236
        S3: vstore
2237
 
2238
    }while...
2239
 
2240
   Stmt S1 was created in the loop preheader block as part of misaligned-load
2241
   handling. This function fixes the name of the vuse of S1 from 'name1' to
2242
   'name0'.  */
2243
 
2244
static void
2245
update_vuses_to_preheader (tree stmt, struct loop *loop)
2246
{
2247
  basic_block header_bb = loop->header;
2248
  edge preheader_e = loop_preheader_edge (loop);
2249
  ssa_op_iter iter;
2250
  use_operand_p use_p;
2251
 
2252
  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2253
    {
2254
      tree ssa_name = USE_FROM_PTR (use_p);
2255
      tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2256
      tree name_var = SSA_NAME_VAR (ssa_name);
2257
      basic_block bb = bb_for_stmt (def_stmt);
2258
 
2259
      /* For a use before any definitions, def_stmt is a NOP_EXPR.  */
2260
      if (!IS_EMPTY_STMT (def_stmt)
2261
          && flow_bb_inside_loop_p (loop, bb))
2262
        {
2263
          /* If the block containing the statement defining the SSA_NAME
2264
             is in the loop then it's necessary to find the definition
2265
             outside the loop using the PHI nodes of the header.  */
2266
          tree phi;
2267
          bool updated = false;
2268
 
2269
          for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2270
            {
2271
              if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2272
                {
2273
                  SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2274
                  updated = true;
2275
                  break;
2276
                }
2277
            }
2278
          gcc_assert (updated);
2279
        }
2280
    }
2281
}
2282
 
2283
 
2284
/*   Function vect_update_ivs_after_vectorizer.
2285
 
2286
     "Advance" the induction variables of LOOP to the value they should take
2287
     after the execution of LOOP.  This is currently necessary because the
2288
     vectorizer does not handle induction variables that are used after the
2289
     loop.  Such a situation occurs when the last iterations of LOOP are
2290
     peeled, because:
2291
     1. We introduced new uses after LOOP for IVs that were not originally used
2292
        after LOOP: the IVs of LOOP are now used by an epilog loop.
2293
     2. LOOP is going to be vectorized; this means that it will iterate N/VF
2294
        times, whereas the loop IVs should be bumped N times.
2295
 
2296
     Input:
2297
     - LOOP - a loop that is going to be vectorized. The last few iterations
2298
              of LOOP were peeled.
2299
     - NITERS - the number of iterations that LOOP executes (before it is
2300
                vectorized). i.e, the number of times the ivs should be bumped.
2301
     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2302
                  coming out from LOOP on which there are uses of the LOOP ivs
2303
                  (this is the path from LOOP->exit to epilog_loop->preheader).
2304
 
2305
                  The new definitions of the ivs are placed in LOOP->exit.
2306
                  The phi args associated with the edge UPDATE_E in the bb
2307
                  UPDATE_E->dest are updated accordingly.
2308
 
2309
     Assumption 1: Like the rest of the vectorizer, this function assumes
2310
     a single loop exit that has a single predecessor.
2311
 
2312
     Assumption 2: The phi nodes in the LOOP header and in update_bb are
2313
     organized in the same order.
2314
 
2315
     Assumption 3: The access function of the ivs is simple enough (see
2316
     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2317
 
2318
     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2319
     coming out of LOOP on which the ivs of LOOP are used (this is the path
2320
     that leads to the epilog loop; other paths skip the epilog loop).  This
2321
     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2322
     needs to have its phis updated.
2323
 */
2324
 
2325
static void
2326
vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2327
                                  edge update_e)
2328
{
2329
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2330
  basic_block exit_bb = loop->single_exit->dest;
2331
  tree phi, phi1;
2332
  basic_block update_bb = update_e->dest;
2333
 
2334
  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2335
 
2336
  /* Make sure there exists a single-predecessor exit bb:  */
2337
  gcc_assert (single_pred_p (exit_bb));
2338
 
2339
  for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2340
       phi && phi1;
2341
       phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2342
    {
2343
      tree access_fn = NULL;
2344
      tree evolution_part;
2345
      tree init_expr;
2346
      tree step_expr;
2347
      tree var, stmt, ni, ni_name;
2348
      block_stmt_iterator last_bsi;
2349
 
2350
      if (vect_print_dump_info (REPORT_DETAILS))
2351
        {
2352
          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2353
          print_generic_expr (vect_dump, phi, TDF_SLIM);
2354
        }
2355
 
2356
      /* Skip virtual phi's.  */
2357
      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2358
        {
2359
          if (vect_print_dump_info (REPORT_DETAILS))
2360
            fprintf (vect_dump, "virtual phi. skip.");
2361
          continue;
2362
        }
2363
 
2364
      /* Skip reduction phis.  */
2365
      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2366
        {
2367
          if (vect_print_dump_info (REPORT_DETAILS))
2368
            fprintf (vect_dump, "reduc phi. skip.");
2369
          continue;
2370
        }
2371
 
2372
      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2373
      gcc_assert (access_fn);
2374
      evolution_part =
2375
         unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2376
      gcc_assert (evolution_part != NULL_TREE);
2377
 
2378
      /* FORNOW: We do not support IVs whose evolution function is a polynomial
2379
         of degree >= 2 or exponential.  */
2380
      gcc_assert (!tree_is_chrec (evolution_part));
2381
 
2382
      step_expr = evolution_part;
2383
      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2384
                                                               loop->num));
2385
 
2386
      ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2387
                  build2 (MULT_EXPR, TREE_TYPE (niters),
2388
                       niters, step_expr), init_expr);
2389
 
2390
      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2391
      add_referenced_tmp_var (var);
2392
 
2393
      ni_name = force_gimple_operand (ni, &stmt, false, var);
2394
 
2395
      /* Insert stmt into exit_bb.  */
2396
      last_bsi = bsi_last (exit_bb);
2397
      if (stmt)
2398
        bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2399
 
2400
      /* Fix phi expressions in the successor bb.  */
2401
      SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2402
    }
2403
}
2404
 
2405
 
2406
/* Function vect_do_peeling_for_loop_bound
2407
 
2408
   Peel the last iterations of the loop represented by LOOP_VINFO.
2409
   The peeled iterations form a new epilog loop.  Given that the loop now
2410
   iterates NITERS times, the new epilog loop iterates
2411
   NITERS % VECTORIZATION_FACTOR times.
2412
 
2413
   The original loop will later be made to iterate
2414
   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2415
 
2416
static void
2417
vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2418
                                struct loops *loops)
2419
{
2420
  tree ni_name, ratio_mult_vf_name;
2421
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2422
  struct loop *new_loop;
2423
  edge update_e;
2424
  basic_block preheader;
2425
  int loop_num;
2426
 
2427
  if (vect_print_dump_info (REPORT_DETAILS))
2428
    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2429
 
2430
  initialize_original_copy_tables ();
2431
 
2432
  /* Generate the following variables on the preheader of original loop:
2433
 
2434
     ni_name = number of iteration the original loop executes
2435
     ratio = ni_name / vf
2436
     ratio_mult_vf_name = ratio * vf  */
2437
  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2438
                                   &ratio_mult_vf_name, ratio);
2439
 
2440
  loop_num  = loop->num;
2441
  new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2442
                                            ratio_mult_vf_name, ni_name, false);
2443
  gcc_assert (new_loop);
2444
  gcc_assert (loop_num == loop->num);
2445
#ifdef ENABLE_CHECKING
2446
  slpeel_verify_cfg_after_peeling (loop, new_loop);
2447
#endif
2448
 
2449
  /* A guard that controls whether the new_loop is to be executed or skipped
2450
     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
2451
     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
2452
     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
2453
     is on the path where the LOOP IVs are used and need to be updated.  */
2454
 
2455
  preheader = loop_preheader_edge (new_loop)->src;
2456
  if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2457
    update_e = EDGE_PRED (preheader, 0);
2458
  else
2459
    update_e = EDGE_PRED (preheader, 1);
2460
 
2461
  /* Update IVs of original loop as if they were advanced
2462
     by ratio_mult_vf_name steps.  */
2463
  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2464
 
2465
  /* After peeling we have to reset scalar evolution analyzer.  */
2466
  scev_reset ();
2467
 
2468
  free_original_copy_tables ();
2469
}
2470
 
2471
 
2472
/* Function vect_gen_niters_for_prolog_loop
2473
 
2474
   Set the number of iterations for the loop represented by LOOP_VINFO
2475
   to the minimum between LOOP_NITERS (the original iteration count of the loop)
2476
   and the misalignment of DR - the data reference recorded in
2477
   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
2478
   this loop, the data reference DR will refer to an aligned location.
2479
 
2480
   The following computation is generated:
2481
 
2482
   If the misalignment of DR is known at compile time:
2483
     addr_mis = int mis = DR_MISALIGNMENT (dr);
2484
   Else, compute address misalignment in bytes:
2485
     addr_mis = addr & (vectype_size - 1)
2486
 
2487
   prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2488
 
2489
   (elem_size = element type size; an element is the scalar element
2490
        whose type is the inner type of the vectype)  */
2491
 
2492
static tree
2493
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2494
{
2495
  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2496
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2497
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2498
  tree var, stmt;
2499
  tree iters, iters_name;
2500
  edge pe;
2501
  basic_block new_bb;
2502
  tree dr_stmt = DR_STMT (dr);
2503
  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2504
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2505
  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2506
  tree niters_type = TREE_TYPE (loop_niters);
2507
 
2508
  pe = loop_preheader_edge (loop);
2509
 
2510
  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2511
    {
2512
      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2513
      int element_size = vectype_align/vf;
2514
      int elem_misalign = byte_misalign / element_size;
2515
 
2516
      if (vect_print_dump_info (REPORT_DETAILS))
2517
        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2518
      iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2519
    }
2520
  else
2521
    {
2522
      tree new_stmts = NULL_TREE;
2523
      tree start_addr =
2524
        vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2525
      tree ptr_type = TREE_TYPE (start_addr);
2526
      tree size = TYPE_SIZE (ptr_type);
2527
      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2528
      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2529
      tree elem_size_log =
2530
        build_int_cst (type, exact_log2 (vectype_align/vf));
2531
      tree vf_minus_1 = build_int_cst (type, vf - 1);
2532
      tree vf_tree = build_int_cst (type, vf);
2533
      tree byte_misalign;
2534
      tree elem_misalign;
2535
 
2536
      new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2537
      gcc_assert (!new_bb);
2538
 
2539
      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2540
      byte_misalign =
2541
        build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2542
 
2543
      /* Create:  elem_misalign = byte_misalign / element_size  */
2544
      elem_misalign =
2545
        build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2546
 
2547
      /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
2548
      iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
2549
      iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
2550
      iters = fold_convert (niters_type, iters);
2551
    }
2552
 
2553
  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2554
  /* If the loop bound is known at compile time we already verified that it is
2555
     greater than vf; since the misalignment ('iters') is at most vf, there's
2556
     no need to generate the MIN_EXPR in this case.  */
2557
  if (TREE_CODE (loop_niters) != INTEGER_CST)
2558
    iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2559
 
2560
  if (vect_print_dump_info (REPORT_DETAILS))
2561
    {
2562
      fprintf (vect_dump, "niters for prolog loop: ");
2563
      print_generic_expr (vect_dump, iters, TDF_SLIM);
2564
    }
2565
 
2566
  var = create_tmp_var (niters_type, "prolog_loop_niters");
2567
  add_referenced_tmp_var (var);
2568
  iters_name = force_gimple_operand (iters, &stmt, false, var);
2569
 
2570
  /* Insert stmt on loop preheader edge.  */
2571
  if (stmt)
2572
    {
2573
      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2574
      gcc_assert (!new_bb);
2575
    }
2576
 
2577
  return iters_name;
2578
}
2579
 
2580
 
2581
/* Function vect_update_init_of_dr
2582
 
2583
   NITERS iterations were peeled from LOOP.  DR represents a data reference
2584
   in LOOP.  This function updates the information recorded in DR to
2585
   account for the fact that the first NITERS iterations had already been
2586
   executed.  Specifically, it updates the OFFSET field of DR.  */
2587
 
2588
static void
2589
vect_update_init_of_dr (struct data_reference *dr, tree niters)
2590
{
2591
  tree offset = DR_OFFSET (dr);
2592
 
2593
  niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2594
  offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2595
  DR_OFFSET (dr) = offset;
2596
}
2597
 
2598
 
2599
/* Function vect_update_inits_of_drs
2600
 
2601
   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2602
   This function updates the information recorded for the data references in
2603
   the loop to account for the fact that the first NITERS iterations had
2604
   already been executed.  Specifically, it updates the initial_condition of the
2605
   access_function of all the data_references in the loop.  */
2606
 
2607
static void
2608
vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2609
{
2610
  unsigned int i;
2611
  varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2612
 
2613
  if (vect_dump && (dump_flags & TDF_DETAILS))
2614
    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2615
 
2616
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
2617
    {
2618
      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
2619
      vect_update_init_of_dr (dr, niters);
2620
    }
2621
}
2622
 
2623
 
2624
/* Function vect_do_peeling_for_alignment
2625
 
2626
   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2627
   'niters' is set to the misalignment of one of the data references in the
2628
   loop, thereby forcing it to refer to an aligned location at the beginning
2629
   of the execution of this loop.  The data reference for which we are
2630
   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2631
 
2632
static void
2633
vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2634
{
2635
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2636
  tree niters_of_prolog_loop, ni_name;
2637
  tree n_iters;
2638
  struct loop *new_loop;
2639
 
2640
  if (vect_print_dump_info (REPORT_DETAILS))
2641
    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2642
 
2643
  initialize_original_copy_tables ();
2644
 
2645
  ni_name = vect_build_loop_niters (loop_vinfo);
2646
  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2647
 
2648
  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2649
  new_loop =
2650
        slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2651
                                       niters_of_prolog_loop, ni_name, true);
2652
  gcc_assert (new_loop);
2653
#ifdef ENABLE_CHECKING
2654
  slpeel_verify_cfg_after_peeling (new_loop, loop);
2655
#endif
2656
 
2657
  /* Update number of times loop executes.  */
2658
  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2659
  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2660
                TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2661
 
2662
  /* Update the init conditions of the access functions of all data refs.  */
2663
  vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2664
 
2665
  /* After peeling we have to reset scalar evolution analyzer.  */
2666
  scev_reset ();
2667
 
2668
  free_original_copy_tables ();
2669
}
2670
 
2671
 
2672
/* Function vect_create_cond_for_align_checks.
2673
 
2674
   Create a conditional expression that represents the alignment checks for
2675
   all of data references (array element references) whose alignment must be
2676
   checked at runtime.
2677
 
2678
   Input:
2679
   LOOP_VINFO - two fields of the loop information are used.
2680
                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2681
                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2682
 
2683
   Output:
2684
   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2685
                         expression.
2686
   The returned value is the conditional expression to be used in the if
2687
   statement that controls which version of the loop gets executed at runtime.
2688
 
2689
   The algorithm makes two assumptions:
2690
     1) The number of bytes "n" in a vector is a power of 2.
2691
     2) An address "a" is aligned if a%n is zero and that this
2692
        test can be done as a&(n-1) == 0.  For example, for 16
2693
        byte vectors the test is a&0xf == 0.  */
2694
 
2695
static tree
2696
vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2697
                                   tree *cond_expr_stmt_list)
2698
{
2699
  VEC(tree,heap) *may_misalign_stmts
2700
    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2701
  tree ref_stmt;
2702
  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2703
  tree mask_cst;
2704
  unsigned int i;
2705
  tree psize;
2706
  tree int_ptrsize_type;
2707
  char tmp_name[20];
2708
  tree or_tmp_name = NULL_TREE;
2709
  tree and_tmp, and_tmp_name, and_stmt;
2710
  tree ptrsize_zero;
2711
 
2712
  /* Check that mask is one less than a power of 2, i.e., mask is
2713
     all zeros followed by all ones.  */
2714
  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2715
 
2716
  /* CHECKME: what is the best integer or unsigned type to use to hold a
2717
     cast from a pointer value?  */
2718
  psize = TYPE_SIZE (ptr_type_node);
2719
  int_ptrsize_type
2720
    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2721
 
2722
  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2723
     of the first vector of the i'th data reference. */
2724
 
2725
  for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2726
    {
2727
      tree new_stmt_list = NULL_TREE;
2728
      tree addr_base;
2729
      tree addr_tmp, addr_tmp_name, addr_stmt;
2730
      tree or_tmp, new_or_tmp_name, or_stmt;
2731
 
2732
      /* create: addr_tmp = (int)(address_of_first_vector) */
2733
      addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2734
                                                        &new_stmt_list,
2735
                                                        NULL_TREE);
2736
 
2737
      if (new_stmt_list != NULL_TREE)
2738
        append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2739
 
2740
      sprintf (tmp_name, "%s%d", "addr2int", i);
2741
      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2742
      add_referenced_tmp_var (addr_tmp);
2743
      addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2744
      addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2745
      addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2746
                          addr_tmp_name, addr_stmt);
2747
      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2748
      append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2749
 
2750
      /* The addresses are OR together.  */
2751
 
2752
      if (or_tmp_name != NULL_TREE)
2753
        {
2754
          /* create: or_tmp = or_tmp | addr_tmp */
2755
          sprintf (tmp_name, "%s%d", "orptrs", i);
2756
          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2757
          add_referenced_tmp_var (or_tmp);
2758
          new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2759
          or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2760
                            build2 (BIT_IOR_EXPR, int_ptrsize_type,
2761
                                    or_tmp_name,
2762
                                    addr_tmp_name));
2763
          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2764
          append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2765
          or_tmp_name = new_or_tmp_name;
2766
        }
2767
      else
2768
        or_tmp_name = addr_tmp_name;
2769
 
2770
    } /* end for i */
2771
 
2772
  mask_cst = build_int_cst (int_ptrsize_type, mask);
2773
 
2774
  /* create: and_tmp = or_tmp & mask  */
2775
  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2776
  add_referenced_tmp_var (and_tmp);
2777
  and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2778
 
2779
  and_stmt = build2 (MODIFY_EXPR, void_type_node,
2780
                     and_tmp_name,
2781
                     build2 (BIT_AND_EXPR, int_ptrsize_type,
2782
                             or_tmp_name, mask_cst));
2783
  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2784
  append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2785
 
2786
  /* Make and_tmp the left operand of the conditional test against zero.
2787
     if and_tmp has a non-zero bit then some address is unaligned.  */
2788
  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2789
  return build2 (EQ_EXPR, boolean_type_node,
2790
                 and_tmp_name, ptrsize_zero);
2791
}
2792
 
2793
 
2794
/* Function vect_transform_loop.
2795
 
2796
   The analysis phase has determined that the loop is vectorizable.
2797
   Vectorize the loop - created vectorized stmts to replace the scalar
2798
   stmts in the loop, and update the loop exit condition.  */
2799
 
2800
void
2801
vect_transform_loop (loop_vec_info loop_vinfo,
2802
                     struct loops *loops ATTRIBUTE_UNUSED)
2803
{
2804
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2805
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2806
  int nbbs = loop->num_nodes;
2807
  block_stmt_iterator si;
2808
  int i;
2809
  tree ratio = NULL;
2810
  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2811
  bitmap_iterator bi;
2812
  unsigned int j;
2813
 
2814
  if (vect_print_dump_info (REPORT_DETAILS))
2815
    fprintf (vect_dump, "=== vec_transform_loop ===");
2816
 
2817
  /* If the loop has data references that may or may not be aligned then
2818
     two versions of the loop need to be generated, one which is vectorized
2819
     and one which isn't.  A test is then generated to control which of the
2820
     loops is executed.  The test checks for the alignment of all of the
2821
     data references that may or may not be aligned. */
2822
 
2823
  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2824
    {
2825
      struct loop *nloop;
2826
      tree cond_expr;
2827
      tree cond_expr_stmt_list = NULL_TREE;
2828
      basic_block condition_bb;
2829
      block_stmt_iterator cond_exp_bsi;
2830
      basic_block merge_bb;
2831
      basic_block new_exit_bb;
2832
      edge new_exit_e, e;
2833
      tree orig_phi, new_phi, arg;
2834
 
2835
      cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
2836
                                                     &cond_expr_stmt_list);
2837
      initialize_original_copy_tables ();
2838
      nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
2839
      free_original_copy_tables();
2840
 
2841
      /** Loop versioning violates an assumption we try to maintain during
2842
         vectorization - that the loop exit block has a single predecessor.
2843
         After versioning, the exit block of both loop versions is the same
2844
         basic block (i.e. it has two predecessors). Just in order to simplify
2845
         following transformations in the vectorizer, we fix this situation
2846
         here by adding a new (empty) block on the exit-edge of the loop,
2847
         with the proper loop-exit phis to maintain loop-closed-form.  **/
2848
 
2849
      merge_bb = loop->single_exit->dest;
2850
      gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2851
      new_exit_bb = split_edge (loop->single_exit);
2852
      add_bb_to_loop (new_exit_bb, loop->outer);
2853
      new_exit_e = loop->single_exit;
2854
      e = EDGE_SUCC (new_exit_bb, 0);
2855
 
2856
      for (orig_phi = phi_nodes (merge_bb); orig_phi;
2857
           orig_phi = PHI_CHAIN (orig_phi))
2858
        {
2859
          new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2860
                                     new_exit_bb);
2861
          arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2862
          add_phi_arg (new_phi, arg, new_exit_e);
2863
          SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
2864
        }
2865
 
2866
      /** end loop-exit-fixes after versioning  **/
2867
 
2868
      update_ssa (TODO_update_ssa);
2869
      cond_exp_bsi = bsi_last (condition_bb);
2870
      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
2871
    }
2872
 
2873
  /* CHECKME: we wouldn't need this if we calles update_ssa once
2874
     for all loops.  */
2875
  bitmap_zero (vect_vnames_to_rename);
2876
 
2877
  /* Peel the loop if there are data refs with unknown alignment.
2878
     Only one data ref with unknown store is allowed.  */
2879
 
2880
  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2881
    vect_do_peeling_for_alignment (loop_vinfo, loops);
2882
 
2883
  /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2884
     compile time constant), or it is a constant that doesn't divide by the
2885
     vectorization factor, then an epilog loop needs to be created.
2886
     We therefore duplicate the loop: the original loop will be vectorized,
2887
     and will compute the first (n/VF) iterations. The second copy of the loop
2888
     will remain scalar and will compute the remaining (n%VF) iterations.
2889
     (VF is the vectorization factor).  */
2890
 
2891
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2892
      || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2893
          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2894
    vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2895
  else
2896
    ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2897
                LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2898
 
2899
  /* 1) Make sure the loop header has exactly two entries
2900
     2) Make sure we have a preheader basic block.  */
2901
 
2902
  gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2903
 
2904
  loop_split_edge_with (loop_preheader_edge (loop), NULL);
2905
 
2906
 
2907
  /* FORNOW: the vectorizer supports only loops which body consist
2908
     of one basic block (header + empty latch). When the vectorizer will
2909
     support more involved loop forms, the order by which the BBs are
2910
     traversed need to be reconsidered.  */
2911
 
2912
  for (i = 0; i < nbbs; i++)
2913
    {
2914
      basic_block bb = bbs[i];
2915
 
2916
      for (si = bsi_start (bb); !bsi_end_p (si);)
2917
        {
2918
          tree stmt = bsi_stmt (si);
2919
          stmt_vec_info stmt_info;
2920
          bool is_store;
2921
 
2922
          if (vect_print_dump_info (REPORT_DETAILS))
2923
            {
2924
              fprintf (vect_dump, "------>vectorizing statement: ");
2925
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
2926
            }
2927
          stmt_info = vinfo_for_stmt (stmt);
2928
          gcc_assert (stmt_info);
2929
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
2930
              && !STMT_VINFO_LIVE_P (stmt_info))
2931
            {
2932
              bsi_next (&si);
2933
              continue;
2934
            }
2935
          /* FORNOW: Verify that all stmts operate on the same number of
2936
                     units and no inner unrolling is necessary.  */
2937
          gcc_assert
2938
                (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2939
                 == (unsigned HOST_WIDE_INT) vectorization_factor);
2940
 
2941
          /* -------- vectorize statement ------------ */
2942
          if (vect_print_dump_info (REPORT_DETAILS))
2943
            fprintf (vect_dump, "transform statement.");
2944
 
2945
          is_store = vect_transform_stmt (stmt, &si);
2946
          if (is_store)
2947
            {
2948
              /* Free the attached stmt_vec_info and remove the stmt.  */
2949
              stmt_ann_t ann = stmt_ann (stmt);
2950
              free (stmt_info);
2951
              set_stmt_info ((tree_ann_t)ann, NULL);
2952
              bsi_remove (&si);
2953
              continue;
2954
            }
2955
 
2956
          bsi_next (&si);
2957
        }                       /* stmts in BB */
2958
    }                           /* BBs in loop */
2959
 
2960
  slpeel_make_loop_iterate_ntimes (loop, ratio);
2961
 
2962
  EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
2963
    mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
2964
 
2965
  /* The memory tags and pointers in vectorized statements need to
2966
     have their SSA forms updated.  FIXME, why can't this be delayed
2967
     until all the loops have been transformed?  */
2968
  update_ssa (TODO_update_ssa);
2969
 
2970
  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2971
    fprintf (vect_dump, "LOOP VECTORIZED.");
2972
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.