OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [tree-vect-transform.c] - Blame information for rev 199

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Transformation Utilities for Loop Vectorization.
2
   Copyright (C) 2003,2004,2005,2006, 2007 Free Software Foundation, Inc.
3
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "ggc.h"
26
#include "tree.h"
27
#include "target.h"
28
#include "rtl.h"
29
#include "basic-block.h"
30
#include "diagnostic.h"
31
#include "tree-flow.h"
32
#include "tree-dump.h"
33
#include "timevar.h"
34
#include "cfgloop.h"
35
#include "expr.h"
36
#include "optabs.h"
37
#include "recog.h"
38
#include "tree-data-ref.h"
39
#include "tree-chrec.h"
40
#include "tree-scalar-evolution.h"
41
#include "tree-vectorizer.h"
42
#include "langhooks.h"
43
#include "tree-pass.h"
44
#include "toplev.h"
45
#include "real.h"
46
 
47
/* Utility functions for the code transformation.  */
48
static bool vect_transform_stmt (tree, block_stmt_iterator *);
49
static void vect_align_data_ref (tree);
50
static tree vect_create_destination_var (tree, tree);
51
static tree vect_create_data_ref_ptr
52
  (tree, block_stmt_iterator *, tree, tree *, bool);
53
static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54
static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
55
static tree vect_get_vec_def_for_operand (tree, tree, tree *);
56
static tree vect_init_vector (tree, tree);
57
static void vect_finish_stmt_generation
58
  (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
59
static bool vect_is_simple_cond (tree, loop_vec_info);
60
static void update_vuses_to_preheader (tree, struct loop*);
61
static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
62
static tree get_initial_def_for_reduction (tree, tree, tree *);
63
 
64
/* Utility function dealing with loop peeling (not peeling itself).  */
65
static void vect_generate_tmps_on_preheader
66
  (loop_vec_info, tree *, tree *, tree *);
67
static tree vect_build_loop_niters (loop_vec_info);
68
static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69
static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70
static void vect_update_init_of_dr (struct data_reference *, tree niters);
71
static void vect_update_inits_of_drs (loop_vec_info, tree);
72
static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73
static void vect_do_peeling_for_loop_bound
74
  (loop_vec_info, tree *, struct loops *);
75
static int vect_min_worthwhile_factor (enum tree_code);
76
 
77
 
78
/* Function vect_get_new_vect_var.
79
 
80
   Returns a name for a new variable. The current naming scheme appends the
81
   prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82
   the name of vectorizer generated variables, and appends that to NAME if
83
   provided.  */
84
 
85
static tree
86
vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
87
{
88
  const char *prefix;
89
  tree new_vect_var;
90
 
91
  switch (var_kind)
92
  {
93
  case vect_simple_var:
94
    prefix = "vect_";
95
    break;
96
  case vect_scalar_var:
97
    prefix = "stmp_";
98
    break;
99
  case vect_pointer_var:
100
    prefix = "vect_p";
101
    break;
102
  default:
103
    gcc_unreachable ();
104
  }
105
 
106
  if (name)
107
    new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
108
  else
109
    new_vect_var = create_tmp_var (type, prefix);
110
 
111
  return new_vect_var;
112
}
113
 
114
 
115
/* Function vect_create_addr_base_for_vector_ref.
116
 
117
   Create an expression that computes the address of the first memory location
118
   that will be accessed for a data reference.
119
 
120
   Input:
121
   STMT: The statement containing the data reference.
122
   NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
123
   OFFSET: Optional. If supplied, it is be added to the initial address.
124
 
125
   Output:
126
   1. Return an SSA_NAME whose value is the address of the memory location of
127
      the first vector of the data reference.
128
   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
129
      these statement(s) which define the returned SSA_NAME.
130
 
131
   FORNOW: We are only handling array accesses with step 1.  */
132
 
133
static tree
134
vect_create_addr_base_for_vector_ref (tree stmt,
135
                                      tree *new_stmt_list,
136
                                      tree offset)
137
{
138
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
139
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
140
  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
141
  tree base_name = build_fold_indirect_ref (data_ref_base);
142
  tree ref = DR_REF (dr);
143
  tree scalar_type = TREE_TYPE (ref);
144
  tree scalar_ptr_type = build_pointer_type (scalar_type);
145
  tree vec_stmt;
146
  tree new_temp;
147
  tree addr_base, addr_expr;
148
  tree dest, new_stmt;
149
  tree base_offset = unshare_expr (DR_OFFSET (dr));
150
  tree init = unshare_expr (DR_INIT (dr));
151
 
152
  /* Create base_offset */
153
  base_offset = size_binop (PLUS_EXPR, base_offset, init);
154
  dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
155
  add_referenced_var (dest);
156
  base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
157
  append_to_statement_list_force (new_stmt, new_stmt_list);
158
 
159
  if (offset)
160
    {
161
      tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
162
      add_referenced_var (tmp);
163
      offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
164
                            DR_STEP (dr));
165
      base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
166
                                 base_offset, offset);
167
      base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
168
      append_to_statement_list_force (new_stmt, new_stmt_list);
169
    }
170
 
171
  /* base + base_offset */
172
  addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
173
                           base_offset);
174
 
175
  /* addr_expr = addr_base */
176
  addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
177
                                     get_name (base_name));
178
  add_referenced_var (addr_expr);
179
  vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
180
  new_temp = make_ssa_name (addr_expr, vec_stmt);
181
  TREE_OPERAND (vec_stmt, 0) = new_temp;
182
  append_to_statement_list_force (vec_stmt, new_stmt_list);
183
 
184
  if (vect_print_dump_info (REPORT_DETAILS))
185
    {
186
      fprintf (vect_dump, "created ");
187
      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
188
    }
189
  return new_temp;
190
}
191
 
192
 
193
/* Function vect_align_data_ref.
194
 
195
   Handle misalignment of a memory accesses.
196
 
197
   FORNOW: Can't handle misaligned accesses.
198
   Make sure that the dataref is aligned.  */
199
 
200
static void
201
vect_align_data_ref (tree stmt)
202
{
203
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
204
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
205
 
206
  /* FORNOW: can't handle misaligned accesses;
207
             all accesses expected to be aligned.  */
208
  gcc_assert (aligned_access_p (dr));
209
}
210
 
211
 
212
/* Function vect_create_data_ref_ptr.
213
 
214
   Create a memory reference expression for vector access, to be used in a
215
   vector load/store stmt. The reference is based on a new pointer to vector
216
   type (vp).
217
 
218
   Input:
219
   1. STMT: a stmt that references memory. Expected to be of the form
220
         MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
221
   2. BSI: block_stmt_iterator where new stmts can be added.
222
   3. OFFSET (optional): an offset to be added to the initial address accessed
223
        by the data-ref in STMT.
224
   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
225
        pointing to the initial address.
226
 
227
   Output:
228
   1. Declare a new ptr to vector_type, and have it point to the base of the
229
      data reference (initial addressed accessed by the data reference).
230
      For example, for vector of type V8HI, the following code is generated:
231
 
232
      v8hi *vp;
233
      vp = (v8hi *)initial_address;
234
 
235
      if OFFSET is not supplied:
236
         initial_address = &a[init];
237
      if OFFSET is supplied:
238
         initial_address = &a[init + OFFSET];
239
 
240
      Return the initial_address in INITIAL_ADDRESS.
241
 
242
   2. If ONLY_INIT is true, return the initial pointer.  Otherwise, create
243
      a data-reference in the loop based on the new vector pointer vp.  This
244
      new data reference will by some means be updated each iteration of
245
      the loop.  Return the pointer vp'.
246
 
247
   FORNOW: handle only aligned and consecutive accesses.  */
248
 
249
static tree
250
vect_create_data_ref_ptr (tree stmt,
251
                          block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
252
                          tree offset, tree *initial_address, bool only_init)
253
{
254
  tree base_name;
255
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
257
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
258
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
259
  tree vect_ptr_type;
260
  tree vect_ptr;
261
  tree tag;
262
  tree new_temp;
263
  tree vec_stmt;
264
  tree new_stmt_list = NULL_TREE;
265
  edge pe = loop_preheader_edge (loop);
266
  basic_block new_bb;
267
  tree vect_ptr_init;
268
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
269
 
270
  base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
271
 
272
  if (vect_print_dump_info (REPORT_DETAILS))
273
    {
274
      tree data_ref_base = base_name;
275
      fprintf (vect_dump, "create vector-pointer variable to type: ");
276
      print_generic_expr (vect_dump, vectype, TDF_SLIM);
277
      if (TREE_CODE (data_ref_base) == VAR_DECL)
278
        fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
279
      else if (TREE_CODE (data_ref_base) == ARRAY_REF)
280
        fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
281
      else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
282
        fprintf (vect_dump, "  vectorizing a record based array ref: ");
283
      else if (TREE_CODE (data_ref_base) == SSA_NAME)
284
        fprintf (vect_dump, "  vectorizing a pointer ref: ");
285
      print_generic_expr (vect_dump, base_name, TDF_SLIM);
286
    }
287
 
288
  /** (1) Create the new vector-pointer variable:  **/
289
 
290
  vect_ptr_type = build_pointer_type (vectype);
291
  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
292
                                    get_name (base_name));
293
  add_referenced_var (vect_ptr);
294
 
295
 
296
  /** (2) Add aliasing information to the new vector-pointer:
297
          (The points-to info (DR_PTR_INFO) may be defined later.)  **/
298
 
299
  tag = DR_MEMTAG (dr);
300
  gcc_assert (tag);
301
 
302
  /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
303
     tag must be created with tag added to its may alias list.  */
304
  if (!MTAG_P (tag))
305
    new_type_alias (vect_ptr, tag, DR_REF (dr));
306
  else
307
    var_ann (vect_ptr)->symbol_mem_tag = tag;
308
 
309
  var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
310
 
311
  /** (3) Calculate the initial address the vector-pointer, and set
312
          the vector-pointer to point to it before the loop:  **/
313
 
314
  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
315
  new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
316
                                                   offset);
317
  pe = loop_preheader_edge (loop);
318
  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
319
  gcc_assert (!new_bb);
320
  *initial_address = new_temp;
321
 
322
  /* Create: p = (vectype *) initial_base  */
323
  vec_stmt = fold_convert (vect_ptr_type, new_temp);
324
  vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
325
  vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
326
  TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
327
  new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
328
  gcc_assert (!new_bb);
329
 
330
 
331
  /** (4) Handle the updating of the vector-pointer inside the loop: **/
332
 
333
  if (only_init) /* No update in loop is required.  */
334
    {
335
      /* Copy the points-to information if it exists. */
336
      if (DR_PTR_INFO (dr))
337
        duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
338
      return vect_ptr_init;
339
    }
340
  else
341
    {
342
      block_stmt_iterator incr_bsi;
343
      bool insert_after;
344
      tree indx_before_incr, indx_after_incr;
345
      tree incr;
346
 
347
      standard_iv_increment_position (loop, &incr_bsi, &insert_after);
348
      create_iv (vect_ptr_init,
349
                 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
350
                 NULL_TREE, loop, &incr_bsi, insert_after,
351
                 &indx_before_incr, &indx_after_incr);
352
      incr = bsi_stmt (incr_bsi);
353
      set_stmt_info (stmt_ann (incr),
354
                     new_stmt_vec_info (incr, loop_vinfo));
355
 
356
      /* Copy the points-to information if it exists. */
357
      if (DR_PTR_INFO (dr))
358
        {
359
          duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
360
          duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
361
        }
362
      merge_alias_info (vect_ptr_init, indx_before_incr);
363
      merge_alias_info (vect_ptr_init, indx_after_incr);
364
 
365
      return indx_before_incr;
366
    }
367
}
368
 
369
 
370
/* Function vect_create_destination_var.
371
 
372
   Create a new temporary of type VECTYPE.  */
373
 
374
static tree
375
vect_create_destination_var (tree scalar_dest, tree vectype)
376
{
377
  tree vec_dest;
378
  const char *new_name;
379
  tree type;
380
  enum vect_var_kind kind;
381
 
382
  kind = vectype ? vect_simple_var : vect_scalar_var;
383
  type = vectype ? vectype : TREE_TYPE (scalar_dest);
384
 
385
  gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
386
 
387
  new_name = get_name (scalar_dest);
388
  if (!new_name)
389
    new_name = "var_";
390
  vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
391
  add_referenced_var (vec_dest);
392
 
393
  return vec_dest;
394
}
395
 
396
 
397
/* Function vect_init_vector.
398
 
399
   Insert a new stmt (INIT_STMT) that initializes a new vector variable with
400
   the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
401
   used in the vectorization of STMT.  */
402
 
403
static tree
404
vect_init_vector (tree stmt, tree vector_var)
405
{
406
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
407
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
408
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
409
  tree new_var;
410
  tree init_stmt;
411
  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
412
  tree vec_oprnd;
413
  edge pe;
414
  tree new_temp;
415
  basic_block new_bb;
416
 
417
  new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
418
  add_referenced_var (new_var);
419
 
420
  init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
421
  new_temp = make_ssa_name (new_var, init_stmt);
422
  TREE_OPERAND (init_stmt, 0) = new_temp;
423
 
424
  pe = loop_preheader_edge (loop);
425
  new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
426
  gcc_assert (!new_bb);
427
 
428
  if (vect_print_dump_info (REPORT_DETAILS))
429
    {
430
      fprintf (vect_dump, "created new init_stmt: ");
431
      print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
432
    }
433
 
434
  vec_oprnd = TREE_OPERAND (init_stmt, 0);
435
  return vec_oprnd;
436
}
437
 
438
 
439
/* Function vect_get_vec_def_for_operand.
440
 
441
   OP is an operand in STMT. This function returns a (vector) def that will be
442
   used in the vectorized stmt for STMT.
443
 
444
   In the case that OP is an SSA_NAME which is defined in the loop, then
445
   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
446
 
447
   In case OP is an invariant or constant, a new stmt that creates a vector def
448
   needs to be introduced.  */
449
 
450
static tree
451
vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
452
{
453
  tree vec_oprnd;
454
  tree vec_stmt;
455
  tree def_stmt;
456
  stmt_vec_info def_stmt_info = NULL;
457
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458
  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
459
  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
460
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
461
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
462
  tree vec_inv;
463
  tree vec_cst;
464
  tree t = NULL_TREE;
465
  tree def;
466
  int i;
467
  enum vect_def_type dt;
468
  bool is_simple_use;
469
 
470
  if (vect_print_dump_info (REPORT_DETAILS))
471
    {
472
      fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
473
      print_generic_expr (vect_dump, op, TDF_SLIM);
474
    }
475
 
476
  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
477
  gcc_assert (is_simple_use);
478
  if (vect_print_dump_info (REPORT_DETAILS))
479
    {
480
      if (def)
481
        {
482
          fprintf (vect_dump, "def =  ");
483
          print_generic_expr (vect_dump, def, TDF_SLIM);
484
        }
485
      if (def_stmt)
486
        {
487
          fprintf (vect_dump, "  def_stmt =  ");
488
          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
489
        }
490
    }
491
 
492
  switch (dt)
493
    {
494
    /* Case 1: operand is a constant.  */
495
    case vect_constant_def:
496
      {
497
        if (scalar_def)
498
          *scalar_def = op;
499
 
500
        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
501
        if (vect_print_dump_info (REPORT_DETAILS))
502
          fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
503
 
504
        for (i = nunits - 1; i >= 0; --i)
505
          {
506
            t = tree_cons (NULL_TREE, op, t);
507
          }
508
        vec_cst = build_vector (vectype, t);
509
        return vect_init_vector (stmt, vec_cst);
510
      }
511
 
512
    /* Case 2: operand is defined outside the loop - loop invariant.  */
513
    case vect_invariant_def:
514
      {
515
        if (scalar_def)
516
          *scalar_def = def;
517
 
518
        /* Create 'vec_inv = {inv,inv,..,inv}'  */
519
        if (vect_print_dump_info (REPORT_DETAILS))
520
          fprintf (vect_dump, "Create vector_inv.");
521
 
522
        for (i = nunits - 1; i >= 0; --i)
523
          {
524
            t = tree_cons (NULL_TREE, def, t);
525
          }
526
 
527
        /* FIXME: use build_constructor directly.  */
528
        vec_inv = build_constructor_from_list (vectype, t);
529
        return vect_init_vector (stmt, vec_inv);
530
      }
531
 
532
    /* Case 3: operand is defined inside the loop.  */
533
    case vect_loop_def:
534
      {
535
        if (scalar_def)
536
          *scalar_def = def_stmt;
537
 
538
        /* Get the def from the vectorized stmt.  */
539
        def_stmt_info = vinfo_for_stmt (def_stmt);
540
        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
541
        gcc_assert (vec_stmt);
542
        vec_oprnd = TREE_OPERAND (vec_stmt, 0);
543
        return vec_oprnd;
544
      }
545
 
546
    /* Case 4: operand is defined by a loop header phi - reduction  */
547
    case vect_reduction_def:
548
      {
549
        gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
550
 
551
        /* Get the def before the loop  */
552
        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
553
        return get_initial_def_for_reduction (stmt, op, scalar_def);
554
     }
555
 
556
    /* Case 5: operand is defined by loop-header phi - induction.  */
557
    case vect_induction_def:
558
      {
559
        if (vect_print_dump_info (REPORT_DETAILS))
560
          fprintf (vect_dump, "induction - unsupported.");
561
        internal_error ("no support for induction"); /* FORNOW */
562
      }
563
 
564
    default:
565
      gcc_unreachable ();
566
    }
567
}
568
 
569
 
570
/* Function vect_finish_stmt_generation.
571
 
572
   Insert a new stmt.  */
573
 
574
static void
575
vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
576
{
577
  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
578
 
579
  if (vect_print_dump_info (REPORT_DETAILS))
580
    {
581
      fprintf (vect_dump, "add new stmt: ");
582
      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
583
    }
584
 
585
  /* Make sure bsi points to the stmt that is being vectorized.  */
586
  gcc_assert (stmt == bsi_stmt (*bsi));
587
 
588
#ifdef USE_MAPPED_LOCATION
589
  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
590
#else
591
  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
592
#endif
593
}
594
 
595
 
596
#define ADJUST_IN_EPILOG 1
597
 
598
/* Function get_initial_def_for_reduction
599
 
600
   Input:
601
   STMT - a stmt that performs a reduction operation in the loop.
602
   INIT_VAL - the initial value of the reduction variable
603
 
604
   Output:
605
   SCALAR_DEF - a tree that holds a value to be added to the final result
606
        of the reduction (used for "ADJUST_IN_EPILOG" - see below).
607
   Return a vector variable, initialized according to the operation that STMT
608
        performs. This vector will be used as the initial value of the
609
        vector of partial results.
610
 
611
   Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
612
     add:         [0,0,...,0,0]
613
     mult:        [1,1,...,1,1]
614
     min/max:     [init_val,init_val,..,init_val,init_val]
615
     bit and/or:  [init_val,init_val,..,init_val,init_val]
616
   and when necessary (e.g. add/mult case) let the caller know
617
   that it needs to adjust the result by init_val.
618
 
619
   Option2: Initialize the vector as follows:
620
     add:         [0,0,...,0,init_val]
621
     mult:        [1,1,...,1,init_val]
622
     min/max:     [init_val,init_val,...,init_val]
623
     bit and/or:  [init_val,init_val,...,init_val]
624
   and no adjustments are needed.
625
 
626
   For example, for the following code:
627
 
628
   s = init_val;
629
   for (i=0;i<n;i++)
630
     s = s + a[i];
631
 
632
   STMT is 's = s + a[i]', and the reduction variable is 's'.
633
   For a vector of 4 units, we want to return either [0,0,0,init_val],
634
   or [0,0,0,0] and let the caller know that it needs to adjust
635
   the result at the end by 'init_val'.
636
 
637
   FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
638
   TODO: Use some cost-model to estimate which scheme is more profitable.
639
*/
640
 
641
static tree
642
get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
643
{
644
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
645
  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
646
  int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
647
  int nelements;
648
  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
649
  tree type = TREE_TYPE (init_val);
650
  tree def;
651
  tree vec, t = NULL_TREE;
652
  bool need_epilog_adjust;
653
  int i;
654
 
655
  gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
656
 
657
  switch (code)
658
  {
659
  case WIDEN_SUM_EXPR:
660
  case DOT_PROD_EXPR:
661
  case PLUS_EXPR:
662
    if (INTEGRAL_TYPE_P (type))
663
      def = build_int_cst (type, 0);
664
    else
665
      def = build_real (type, dconst0);
666
 
667
#ifdef ADJUST_IN_EPILOG
668
    /* All the 'nunits' elements are set to 0. The final result will be
669
       adjusted by 'init_val' at the loop epilog.  */
670
    nelements = nunits;
671
    need_epilog_adjust = true;
672
#else
673
    /* 'nunits - 1' elements are set to 0; The last element is set to
674
        'init_val'.  No further adjustments at the epilog are needed.  */
675
    nelements = nunits - 1;
676
    need_epilog_adjust = false;
677
#endif
678
    break;
679
 
680
  case MIN_EXPR:
681
  case MAX_EXPR:
682
    def = init_val;
683
    nelements = nunits;
684
    need_epilog_adjust = false;
685
    break;
686
 
687
  default:
688
    gcc_unreachable ();
689
  }
690
 
691
  for (i = nelements - 1; i >= 0; --i)
692
    t = tree_cons (NULL_TREE, def, t);
693
 
694
  if (nelements == nunits - 1)
695
    {
696
      /* Set the last element of the vector.  */
697
      t = tree_cons (NULL_TREE, init_val, t);
698
      nelements += 1;
699
    }
700
  gcc_assert (nelements == nunits);
701
 
702
  if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
703
    vec = build_vector (vectype, t);
704
  else
705
    vec = build_constructor_from_list (vectype, t);
706
 
707
  if (!need_epilog_adjust)
708
    *scalar_def = NULL_TREE;
709
  else
710
    *scalar_def = init_val;
711
 
712
  return vect_init_vector (stmt, vec);
713
}
714
 
715
 
716
/* Function vect_create_epilog_for_reduction
717
 
718
   Create code at the loop-epilog to finalize the result of a reduction
719
   computation.
720
 
721
   VECT_DEF is a vector of partial results.
722
   REDUC_CODE is the tree-code for the epilog reduction.
723
   STMT is the scalar reduction stmt that is being vectorized.
724
   REDUCTION_PHI is the phi-node that carries the reduction computation.
725
 
726
   This function:
727
   1. Creates the reduction def-use cycle: sets the the arguments for
728
      REDUCTION_PHI:
729
      The loop-entry argument is the vectorized initial-value of the reduction.
730
      The loop-latch argument is VECT_DEF - the vector of partial sums.
731
   2. "Reduces" the vector of partial results VECT_DEF into a single result,
732
      by applying the operation specified by REDUC_CODE if available, or by
733
      other means (whole-vector shifts or a scalar loop).
734
      The function also creates a new phi node at the loop exit to preserve
735
      loop-closed form, as illustrated below.
736
 
737
     The flow at the entry to this function:
738
 
739
        loop:
740
          vec_def = phi <null, null>            # REDUCTION_PHI
741
          VECT_DEF = vector_stmt                # vectorized form of STMT
742
          s_loop = scalar_stmt                  # (scalar) STMT
743
        loop_exit:
744
          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
745
          use <s_out0>
746
          use <s_out0>
747
 
748
     The above is transformed by this function into:
749
 
750
        loop:
751
          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
752
          VECT_DEF = vector_stmt                # vectorized form of STMT
753
          s_loop = scalar_stmt                  # (scalar) STMT
754
        loop_exit:
755
          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
756
          v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
757
          v_out2 = reduce <v_out1>
758
          s_out3 = extract_field <v_out2, 0>
759
          s_out4 = adjust_result <s_out3>
760
          use <s_out4>
761
          use <s_out4>
762
*/
763
 
764
static void
765
vect_create_epilog_for_reduction (tree vect_def, tree stmt,
766
                                  enum tree_code reduc_code, tree reduction_phi)
767
{
768
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
769
  tree vectype;
770
  enum machine_mode mode;
771
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
772
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
773
  basic_block exit_bb;
774
  tree scalar_dest;
775
  tree scalar_type;
776
  tree new_phi;
777
  block_stmt_iterator exit_bsi;
778
  tree vec_dest;
779
  tree new_temp;
780
  tree new_name;
781
  tree epilog_stmt;
782
  tree new_scalar_dest, exit_phi;
783
  tree bitsize, bitpos, bytesize;
784
  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
785
  tree scalar_initial_def;
786
  tree vec_initial_def;
787
  tree orig_name;
788
  imm_use_iterator imm_iter;
789
  use_operand_p use_p;
790
  bool extract_scalar_result;
791
  tree reduction_op;
792
  tree orig_stmt;
793
  tree use_stmt;
794
  tree operation = TREE_OPERAND (stmt, 1);
795
  int op_type;
796
 
797
  op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
798
  reduction_op = TREE_OPERAND (operation, op_type-1);
799
  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
800
  mode = TYPE_MODE (vectype);
801
 
802
  /*** 1. Create the reduction def-use cycle  ***/
803
 
804
  /* 1.1 set the loop-entry arg of the reduction-phi:  */
805
  /* For the case of reduction, vect_get_vec_def_for_operand returns
806
     the scalar def before the loop, that defines the initial value
807
     of the reduction variable.  */
808
  vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
809
                                                  &scalar_initial_def);
810
  add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
811
 
812
  /* 1.2 set the loop-latch arg for the reduction-phi:  */
813
  add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
814
 
815
  if (vect_print_dump_info (REPORT_DETAILS))
816
    {
817
      fprintf (vect_dump, "transform reduction: created def-use cycle:");
818
      print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
819
      fprintf (vect_dump, "\n");
820
      print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
821
    }
822
 
823
 
824
  /*** 2. Create epilog code
825
          The reduction epilog code operates across the elements of the vector
826
          of partial results computed by the vectorized loop.
827
          The reduction epilog code consists of:
828
          step 1: compute the scalar result in a vector (v_out2)
829
          step 2: extract the scalar result (s_out3) from the vector (v_out2)
830
          step 3: adjust the scalar result (s_out3) if needed.
831
 
832
          Step 1 can be accomplished using one the following three schemes:
833
          (scheme 1) using reduc_code, if available.
834
          (scheme 2) using whole-vector shifts, if available.
835
          (scheme 3) using a scalar loop. In this case steps 1+2 above are
836
                     combined.
837
 
838
          The overall epilog code looks like this:
839
 
840
          s_out0 = phi <s_loop>         # original EXIT_PHI
841
          v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
842
          v_out2 = reduce <v_out1>              # step 1
843
          s_out3 = extract_field <v_out2, 0>    # step 2
844
          s_out4 = adjust_result <s_out3>       # step 3
845
 
846
          (step 3 is optional, and step2 1 and 2 may be combined).
847
          Lastly, the uses of s_out0 are replaced by s_out4.
848
 
849
          ***/
850
 
851
  /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
852
        v_out1 = phi <v_loop>  */
853
 
854
  exit_bb = loop->single_exit->dest;
855
  new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
856
  SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
857
  exit_bsi = bsi_start (exit_bb);
858
 
859
  /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
860
         (i.e. when reduc_code is not available) and in the final adjustment code
861
         (if needed).  Also get the original scalar reduction variable as
862
         defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
863
         represents a reduction pattern), the tree-code and scalar-def are
864
         taken from the original stmt that the pattern-stmt (STMT) replaces.
865
         Otherwise (it is a regular reduction) - the tree-code and scalar-def
866
         are taken from STMT.  */
867
 
868
  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
869
  if (!orig_stmt)
870
    {
871
      /* Regular reduction  */
872
      orig_stmt = stmt;
873
    }
874
  else
875
    {
876
      /* Reduction pattern  */
877
      stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
878
      gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
879
      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
880
    }
881
  code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
882
  scalar_dest = TREE_OPERAND (orig_stmt, 0);
883
  scalar_type = TREE_TYPE (scalar_dest);
884
  new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
885
  bitsize = TYPE_SIZE (scalar_type);
886
  bytesize = TYPE_SIZE_UNIT (scalar_type);
887
 
888
  /* 2.3 Create the reduction code, using one of the three schemes described
889
         above.  */
890
 
891
  if (reduc_code < NUM_TREE_CODES)
892
    {
893
      /*** Case 1:  Create:
894
           v_out2 = reduc_expr <v_out1>  */
895
 
896
      if (vect_print_dump_info (REPORT_DETAILS))
897
        fprintf (vect_dump, "Reduce using direct vector reduction.");
898
 
899
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
900
      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
901
                        build1 (reduc_code, vectype,  PHI_RESULT (new_phi)));
902
      new_temp = make_ssa_name (vec_dest, epilog_stmt);
903
      TREE_OPERAND (epilog_stmt, 0) = new_temp;
904
      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
905
 
906
      extract_scalar_result = true;
907
    }
908
  else
909
    {
910
      enum tree_code shift_code = 0;
911
      bool have_whole_vector_shift = true;
912
      int bit_offset;
913
      int element_bitsize = tree_low_cst (bitsize, 1);
914
      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
915
      tree vec_temp;
916
 
917
      if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
918
        shift_code = VEC_RSHIFT_EXPR;
919
      else
920
        have_whole_vector_shift = false;
921
 
922
      /* Regardless of whether we have a whole vector shift, if we're
923
         emulating the operation via tree-vect-generic, we don't want
924
         to use it.  Only the first round of the reduction is likely
925
         to still be profitable via emulation.  */
926
      /* ??? It might be better to emit a reduction tree code here, so that
927
         tree-vect-generic can expand the first round via bit tricks.  */
928
      if (!VECTOR_MODE_P (mode))
929
        have_whole_vector_shift = false;
930
      else
931
        {
932
          optab optab = optab_for_tree_code (code, vectype);
933
          if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
934
            have_whole_vector_shift = false;
935
        }
936
 
937
      if (have_whole_vector_shift)
938
        {
939
          /*** Case 2: Create:
940
             for (offset = VS/2; offset >= element_size; offset/=2)
941
                {
942
                  Create:  va' = vec_shift <va, offset>
943
                  Create:  va = vop <va, va'>
944
                }  */
945
 
946
          if (vect_print_dump_info (REPORT_DETAILS))
947
            fprintf (vect_dump, "Reduce using vector shifts");
948
 
949
          vec_dest = vect_create_destination_var (scalar_dest, vectype);
950
          new_temp = PHI_RESULT (new_phi);
951
 
952
          for (bit_offset = vec_size_in_bits/2;
953
               bit_offset >= element_bitsize;
954
               bit_offset /= 2)
955
            {
956
              tree bitpos = size_int (bit_offset);
957
 
958
              epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
959
              build2 (shift_code, vectype, new_temp, bitpos));
960
              new_name = make_ssa_name (vec_dest, epilog_stmt);
961
              TREE_OPERAND (epilog_stmt, 0) = new_name;
962
              bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
963
 
964
              epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
965
              build2 (code, vectype, new_name, new_temp));
966
              new_temp = make_ssa_name (vec_dest, epilog_stmt);
967
              TREE_OPERAND (epilog_stmt, 0) = new_temp;
968
              bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
969
            }
970
 
971
          extract_scalar_result = true;
972
        }
973
      else
974
        {
975
          tree rhs;
976
 
977
          /*** Case 3: Create:
978
             s = extract_field <v_out2, 0>
979
             for (offset = element_size;
980
                  offset < vector_size;
981
                  offset += element_size;)
982
               {
983
                 Create:  s' = extract_field <v_out2, offset>
984
                 Create:  s = op <s, s'>
985
               }  */
986
 
987
          if (vect_print_dump_info (REPORT_DETAILS))
988
            fprintf (vect_dump, "Reduce using scalar code. ");
989
 
990
          vec_temp = PHI_RESULT (new_phi);
991
          vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
992
          rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
993
                         bitsize_zero_node);
994
          BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
995
          epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
996
          new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
997
          TREE_OPERAND (epilog_stmt, 0) = new_temp;
998
          bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
999
 
1000
          for (bit_offset = element_bitsize;
1001
               bit_offset < vec_size_in_bits;
1002
               bit_offset += element_bitsize)
1003
            {
1004
              tree bitpos = bitsize_int (bit_offset);
1005
              tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1006
                                 bitpos);
1007
 
1008
              BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1009
              epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1010
                                    rhs);
1011
              new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1012
              TREE_OPERAND (epilog_stmt, 0) = new_name;
1013
              bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1014
 
1015
              epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1016
                                build2 (code, scalar_type, new_name, new_temp));
1017
              new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1018
              TREE_OPERAND (epilog_stmt, 0) = new_temp;
1019
              bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1020
            }
1021
 
1022
          extract_scalar_result = false;
1023
        }
1024
    }
1025
 
1026
  /* 2.4  Extract the final scalar result.  Create:
1027
         s_out3 = extract_field <v_out2, bitpos>  */
1028
 
1029
  if (extract_scalar_result)
1030
    {
1031
      tree rhs;
1032
 
1033
      if (vect_print_dump_info (REPORT_DETAILS))
1034
        fprintf (vect_dump, "extract scalar result");
1035
 
1036
      if (BYTES_BIG_ENDIAN)
1037
        bitpos = size_binop (MULT_EXPR,
1038
                       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1039
                       TYPE_SIZE (scalar_type));
1040
      else
1041
        bitpos = bitsize_zero_node;
1042
 
1043
      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1044
      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1045
      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1046
      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1047
      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1048
      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1049
    }
1050
 
1051
  /* 2.4 Adjust the final result by the initial value of the reduction
1052
         variable. (When such adjustment is not needed, then
1053
         'scalar_initial_def' is zero).
1054
 
1055
         Create:
1056
         s_out4 = scalar_expr <s_out3, scalar_initial_def>  */
1057
 
1058
  if (scalar_initial_def)
1059
    {
1060
      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1061
                      build2 (code, scalar_type, new_temp, scalar_initial_def));
1062
      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1063
      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1064
      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1065
    }
1066
 
1067
  /* 2.6 Replace uses of s_out0 with uses of s_out3  */
1068
 
1069
  /* Find the loop-closed-use at the loop exit of the original scalar result.
1070
     (The reduction result is expected to have two immediate uses - one at the
1071
     latch block, and one at the loop exit).  */
1072
  exit_phi = NULL;
1073
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1074
    {
1075
      if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1076
        {
1077
          exit_phi = USE_STMT (use_p);
1078
          break;
1079
        }
1080
    }
1081
  /* We expect to have found an exit_phi because of loop-closed-ssa form.  */
1082
  gcc_assert (exit_phi);
1083
  /* Replace the uses:  */
1084
  orig_name = PHI_RESULT (exit_phi);
1085
  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1086
    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1087
      SET_USE (use_p, new_temp);
1088
}
1089
 
1090
 
1091
/* Function vectorizable_reduction.
1092
 
1093
   Check if STMT performs a reduction operation that can be vectorized.
1094
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1095
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1096
   Return FALSE if not a vectorizable STMT, TRUE otherwise.
1097
 
1098
   This function also handles reduction idioms (patterns) that have been
1099
   recognized in advance during vect_pattern_recog. In this case, STMT may be
1100
   of this form:
1101
     X = pattern_expr (arg0, arg1, ..., X)
1102
   and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1103
   sequence that had been detected and replaced by the pattern-stmt (STMT).
1104
 
1105
   In some cases of reduction patterns, the type of the reduction variable X is
1106
   different than the type of the other arguments of STMT.
1107
   In such cases, the vectype that is used when transforming STMT into a vector
1108
   stmt is different than the vectype that is used to determine the
1109
   vectorization factor, because it consists of a different number of elements
1110
   than the actual number of elements that are being operated upon in parallel.
1111
 
1112
   For example, consider an accumulation of shorts into an int accumulator.
1113
   On some targets it's possible to vectorize this pattern operating on 8
1114
   shorts at a time (hence, the vectype for purposes of determining the
1115
   vectorization factor should be V8HI); on the other hand, the vectype that
1116
   is used to create the vector form is actually V4SI (the type of the result).
1117
 
1118
   Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1119
   indicates what is the actual level of parallelism (V8HI in the example), so
1120
   that the right vectorization factor would be derived. This vectype
1121
   corresponds to the type of arguments to the reduction stmt, and should *NOT*
1122
   be used to create the vectorized stmt. The right vectype for the vectorized
1123
   stmt is obtained from the type of the result X:
1124
        get_vectype_for_scalar_type (TREE_TYPE (X))
1125
 
1126
   This means that, contrary to "regular" reductions (or "regular" stmts in
1127
   general), the following equation:
1128
      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1129
   does *NOT* necessarily hold for reduction patterns.  */
1130
 
1131
bool
1132
vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1133
{
1134
  tree vec_dest;
1135
  tree scalar_dest;
1136
  tree op;
1137
  tree loop_vec_def0, loop_vec_def1;
1138
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1139
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1140
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1141
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1142
  tree operation;
1143
  enum tree_code code, orig_code, epilog_reduc_code = 0;
1144
  enum machine_mode vec_mode;
1145
  int op_type;
1146
  optab optab, reduc_optab;
1147
  tree new_temp;
1148
  tree def, def_stmt;
1149
  enum vect_def_type dt;
1150
  tree new_phi;
1151
  tree scalar_type;
1152
  bool is_simple_use;
1153
  tree orig_stmt;
1154
  stmt_vec_info orig_stmt_info;
1155
  tree expr = NULL_TREE;
1156
  int i;
1157
 
1158
  /* 1. Is vectorizable reduction?  */
1159
 
1160
  /* Not supportable if the reduction variable is used in the loop.  */
1161
  if (STMT_VINFO_RELEVANT_P (stmt_info))
1162
    return false;
1163
 
1164
  if (!STMT_VINFO_LIVE_P (stmt_info))
1165
    return false;
1166
 
1167
  /* Make sure it was already recognized as a reduction computation.  */
1168
  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1169
    return false;
1170
 
1171
  /* 2. Has this been recognized as a reduction pattern?
1172
 
1173
     Check if STMT represents a pattern that has been recognized
1174
     in earlier analysis stages.  For stmts that represent a pattern,
1175
     the STMT_VINFO_RELATED_STMT field records the last stmt in
1176
     the original sequence that constitutes the pattern.  */
1177
 
1178
  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1179
  if (orig_stmt)
1180
    {
1181
      orig_stmt_info = vinfo_for_stmt (orig_stmt);
1182
      gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1183
      gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1184
      gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1185
    }
1186
 
1187
  /* 3. Check the operands of the operation. The first operands are defined
1188
        inside the loop body. The last operand is the reduction variable,
1189
        which is defined by the loop-header-phi.  */
1190
 
1191
  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1192
 
1193
  operation = TREE_OPERAND (stmt, 1);
1194
  code = TREE_CODE (operation);
1195
  op_type = TREE_CODE_LENGTH (code);
1196
 
1197
  if (op_type != binary_op && op_type != ternary_op)
1198
    return false;
1199
  scalar_dest = TREE_OPERAND (stmt, 0);
1200
  scalar_type = TREE_TYPE (scalar_dest);
1201
 
1202
  /* All uses but the last are expected to be defined in the loop.
1203
     The last use is the reduction variable.  */
1204
  for (i = 0; i < op_type-1; i++)
1205
    {
1206
      op = TREE_OPERAND (operation, i);
1207
      is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1208
      gcc_assert (is_simple_use);
1209
      gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1210
                  dt == vect_constant_def);
1211
    }
1212
 
1213
  op = TREE_OPERAND (operation, i);
1214
  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1215
  gcc_assert (is_simple_use);
1216
  gcc_assert (dt == vect_reduction_def);
1217
  gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1218
  if (orig_stmt)
1219
    gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1220
  else
1221
    gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1222
 
1223
  if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1224
    return false;
1225
 
1226
  /* 4. Supportable by target?  */
1227
 
1228
  /* 4.1. check support for the operation in the loop  */
1229
  optab = optab_for_tree_code (code, vectype);
1230
  if (!optab)
1231
    {
1232
      if (vect_print_dump_info (REPORT_DETAILS))
1233
        fprintf (vect_dump, "no optab.");
1234
      return false;
1235
    }
1236
  vec_mode = TYPE_MODE (vectype);
1237
  if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1238
    {
1239
      if (vect_print_dump_info (REPORT_DETAILS))
1240
        fprintf (vect_dump, "op not supported by target.");
1241
      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1242
          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1243
             < vect_min_worthwhile_factor (code))
1244
        return false;
1245
      if (vect_print_dump_info (REPORT_DETAILS))
1246
        fprintf (vect_dump, "proceeding using word mode.");
1247
    }
1248
 
1249
  /* Worthwhile without SIMD support?  */
1250
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1251
      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1252
         < vect_min_worthwhile_factor (code))
1253
    {
1254
      if (vect_print_dump_info (REPORT_DETAILS))
1255
        fprintf (vect_dump, "not worthwhile without SIMD support.");
1256
      return false;
1257
    }
1258
 
1259
  /* 4.2. Check support for the epilog operation.
1260
 
1261
          If STMT represents a reduction pattern, then the type of the
1262
          reduction variable may be different than the type of the rest
1263
          of the arguments.  For example, consider the case of accumulation
1264
          of shorts into an int accumulator; The original code:
1265
                        S1: int_a = (int) short_a;
1266
          orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
1267
 
1268
          was replaced with:
1269
                        STMT: int_acc = widen_sum <short_a, int_acc>
1270
 
1271
          This means that:
1272
          1. The tree-code that is used to create the vector operation in the
1273
             epilog code (that reduces the partial results) is not the
1274
             tree-code of STMT, but is rather the tree-code of the original
1275
             stmt from the pattern that STMT is replacing. I.e, in the example
1276
             above we want to use 'widen_sum' in the loop, but 'plus' in the
1277
             epilog.
1278
          2. The type (mode) we use to check available target support
1279
             for the vector operation to be created in the *epilog*, is
1280
             determined by the type of the reduction variable (in the example
1281
             above we'd check this: plus_optab[vect_int_mode]).
1282
             However the type (mode) we use to check available target support
1283
             for the vector operation to be created *inside the loop*, is
1284
             determined by the type of the other arguments to STMT (in the
1285
             example we'd check this: widen_sum_optab[vect_short_mode]).
1286
 
1287
          This is contrary to "regular" reductions, in which the types of all
1288
          the arguments are the same as the type of the reduction variable.
1289
          For "regular" reductions we can therefore use the same vector type
1290
          (and also the same tree-code) when generating the epilog code and
1291
          when generating the code inside the loop.  */
1292
 
1293
  if (orig_stmt)
1294
    {
1295
      /* This is a reduction pattern: get the vectype from the type of the
1296
         reduction variable, and get the tree-code from orig_stmt.  */
1297
      orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1298
      vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1299
      vec_mode = TYPE_MODE (vectype);
1300
    }
1301
  else
1302
    {
1303
      /* Regular reduction: use the same vectype and tree-code as used for
1304
         the vector code inside the loop can be used for the epilog code. */
1305
      orig_code = code;
1306
    }
1307
 
1308
  if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1309
    return false;
1310
  reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1311
  if (!reduc_optab)
1312
    {
1313
      if (vect_print_dump_info (REPORT_DETAILS))
1314
        fprintf (vect_dump, "no optab for reduction.");
1315
      epilog_reduc_code = NUM_TREE_CODES;
1316
    }
1317
  if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1318
    {
1319
      if (vect_print_dump_info (REPORT_DETAILS))
1320
        fprintf (vect_dump, "reduc op not supported by target.");
1321
      epilog_reduc_code = NUM_TREE_CODES;
1322
    }
1323
 
1324
  if (!vec_stmt) /* transformation not required.  */
1325
    {
1326
      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1327
      return true;
1328
    }
1329
 
1330
  /** Transform.  **/
1331
 
1332
  if (vect_print_dump_info (REPORT_DETAILS))
1333
    fprintf (vect_dump, "transform reduction.");
1334
 
1335
  /* Create the destination vector  */
1336
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1337
 
1338
  /* Create the reduction-phi that defines the reduction-operand.  */
1339
  new_phi = create_phi_node (vec_dest, loop->header);
1340
 
1341
  /* Prepare the operand that is defined inside the loop body  */
1342
  op = TREE_OPERAND (operation, 0);
1343
  loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1344
  if (op_type == binary_op)
1345
    expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi));
1346
  else if (op_type == ternary_op)
1347
    {
1348
      op = TREE_OPERAND (operation, 1);
1349
      loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1350
      expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1351
                     PHI_RESULT (new_phi));
1352
    }
1353
 
1354
  /* Create the vectorized operation that computes the partial results  */
1355
  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
1356
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1357
  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1358
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1359
 
1360
  /* Finalize the reduction-phi (set it's arguments) and create the
1361
     epilog reduction code.  */
1362
  vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1363
  return true;
1364
}
1365
 
1366
 
1367
/* Function vectorizable_assignment.
1368
 
1369
   Check if STMT performs an assignment (copy) that can be vectorized.
1370
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1371
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1372
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1373
 
1374
bool
1375
vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1376
{
1377
  tree vec_dest;
1378
  tree scalar_dest;
1379
  tree op;
1380
  tree vec_oprnd;
1381
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1382
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1383
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1384
  tree new_temp;
1385
  tree def, def_stmt;
1386
  enum vect_def_type dt;
1387
 
1388
  /* Is vectorizable assignment?  */
1389
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1390
    return false;
1391
 
1392
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1393
 
1394
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1395
    return false;
1396
 
1397
  scalar_dest = TREE_OPERAND (stmt, 0);
1398
  if (TREE_CODE (scalar_dest) != SSA_NAME)
1399
    return false;
1400
 
1401
  op = TREE_OPERAND (stmt, 1);
1402
  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1403
    {
1404
      if (vect_print_dump_info (REPORT_DETAILS))
1405
        fprintf (vect_dump, "use not simple.");
1406
      return false;
1407
    }
1408
 
1409
  if (!vec_stmt) /* transformation not required.  */
1410
    {
1411
      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1412
      return true;
1413
    }
1414
 
1415
  /** Transform.  **/
1416
  if (vect_print_dump_info (REPORT_DETAILS))
1417
    fprintf (vect_dump, "transform assignment.");
1418
 
1419
  /* Handle def.  */
1420
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1421
 
1422
  /* Handle use.  */
1423
  op = TREE_OPERAND (stmt, 1);
1424
  vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1425
 
1426
  /* Arguments are ready. create the new vector stmt.  */
1427
  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1428
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1429
  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1430
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1431
 
1432
  return true;
1433
}
1434
 
1435
 
1436
/* Function vect_min_worthwhile_factor.
1437
 
1438
   For a loop where we could vectorize the operation indicated by CODE,
1439
   return the minimum vectorization factor that makes it worthwhile
1440
   to use generic vectors.  */
1441
static int
1442
vect_min_worthwhile_factor (enum tree_code code)
1443
{
1444
  switch (code)
1445
    {
1446
    case PLUS_EXPR:
1447
    case MINUS_EXPR:
1448
    case NEGATE_EXPR:
1449
      return 4;
1450
 
1451
    case BIT_AND_EXPR:
1452
    case BIT_IOR_EXPR:
1453
    case BIT_XOR_EXPR:
1454
    case BIT_NOT_EXPR:
1455
      return 2;
1456
 
1457
    default:
1458
      return INT_MAX;
1459
    }
1460
}
1461
 
1462
 
1463
/* Function vectorizable_operation.
1464
 
1465
   Check if STMT performs a binary or unary operation that can be vectorized.
1466
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1467
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1468
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1469
 
1470
bool
1471
vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1472
{
1473
  tree vec_dest;
1474
  tree scalar_dest;
1475
  tree operation;
1476
  tree op0, op1 = NULL;
1477
  tree vec_oprnd0, vec_oprnd1=NULL;
1478
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1479
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1480
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1481
  int i;
1482
  enum tree_code code;
1483
  enum machine_mode vec_mode;
1484
  tree new_temp;
1485
  int op_type;
1486
  tree op;
1487
  optab optab;
1488
  int icode;
1489
  enum machine_mode optab_op2_mode;
1490
  tree def, def_stmt;
1491
  enum vect_def_type dt;
1492
 
1493
  /* Is STMT a vectorizable binary/unary operation?   */
1494
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1495
    return false;
1496
 
1497
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1498
 
1499
  if (STMT_VINFO_LIVE_P (stmt_info))
1500
    {
1501
      /* FORNOW: not yet supported.  */
1502
      if (vect_print_dump_info (REPORT_DETAILS))
1503
        fprintf (vect_dump, "value used after loop.");
1504
      return false;
1505
    }
1506
 
1507
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1508
    return false;
1509
 
1510
  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1511
    return false;
1512
 
1513
  operation = TREE_OPERAND (stmt, 1);
1514
  code = TREE_CODE (operation);
1515
  optab = optab_for_tree_code (code, vectype);
1516
 
1517
  /* Support only unary or binary operations.  */
1518
  op_type = TREE_CODE_LENGTH (code);
1519
  if (op_type != unary_op && op_type != binary_op)
1520
    {
1521
      if (vect_print_dump_info (REPORT_DETAILS))
1522
        fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1523
      return false;
1524
    }
1525
 
1526
  for (i = 0; i < op_type; i++)
1527
    {
1528
      op = TREE_OPERAND (operation, i);
1529
      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1530
        {
1531
          if (vect_print_dump_info (REPORT_DETAILS))
1532
            fprintf (vect_dump, "use not simple.");
1533
          return false;
1534
        }
1535
    }
1536
 
1537
  /* Supportable by target?  */
1538
  if (!optab)
1539
    {
1540
      if (vect_print_dump_info (REPORT_DETAILS))
1541
        fprintf (vect_dump, "no optab.");
1542
      return false;
1543
    }
1544
  vec_mode = TYPE_MODE (vectype);
1545
  icode = (int) optab->handlers[(int) vec_mode].insn_code;
1546
  if (icode == CODE_FOR_nothing)
1547
    {
1548
      if (vect_print_dump_info (REPORT_DETAILS))
1549
        fprintf (vect_dump, "op not supported by target.");
1550
      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1551
          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1552
             < vect_min_worthwhile_factor (code))
1553
        return false;
1554
      if (vect_print_dump_info (REPORT_DETAILS))
1555
        fprintf (vect_dump, "proceeding using word mode.");
1556
    }
1557
 
1558
  /* Worthwhile without SIMD support?  */
1559
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1560
      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1561
         < vect_min_worthwhile_factor (code))
1562
    {
1563
      if (vect_print_dump_info (REPORT_DETAILS))
1564
        fprintf (vect_dump, "not worthwhile without SIMD support.");
1565
      return false;
1566
    }
1567
 
1568
  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1569
    {
1570
      /* FORNOW: not yet supported.  */
1571
      if (!VECTOR_MODE_P (vec_mode))
1572
        return false;
1573
 
1574
      /* Invariant argument is needed for a vector shift
1575
         by a scalar shift operand.  */
1576
      optab_op2_mode = insn_data[icode].operand[2].mode;
1577
      if (! (VECTOR_MODE_P (optab_op2_mode)
1578
             || dt == vect_constant_def
1579
             || dt == vect_invariant_def))
1580
        {
1581
          if (vect_print_dump_info (REPORT_DETAILS))
1582
            fprintf (vect_dump, "operand mode requires invariant argument.");
1583
          return false;
1584
        }
1585
    }
1586
 
1587
  if (!vec_stmt) /* transformation not required.  */
1588
    {
1589
      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1590
      return true;
1591
    }
1592
 
1593
  /** Transform.  **/
1594
 
1595
  if (vect_print_dump_info (REPORT_DETAILS))
1596
    fprintf (vect_dump, "transform binary/unary operation.");
1597
 
1598
  /* Handle def.  */
1599
  scalar_dest = TREE_OPERAND (stmt, 0);
1600
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1601
 
1602
  /* Handle uses.  */
1603
  op0 = TREE_OPERAND (operation, 0);
1604
  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1605
 
1606
  if (op_type == binary_op)
1607
    {
1608
      op1 = TREE_OPERAND (operation, 1);
1609
 
1610
      if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1611
        {
1612
          /* Vector shl and shr insn patterns can be defined with
1613
             scalar operand 2 (shift operand).  In this case, use
1614
             constant or loop invariant op1 directly, without
1615
             extending it to vector mode first.  */
1616
 
1617
          optab_op2_mode = insn_data[icode].operand[2].mode;
1618
          if (!VECTOR_MODE_P (optab_op2_mode))
1619
            {
1620
              if (vect_print_dump_info (REPORT_DETAILS))
1621
                fprintf (vect_dump, "operand 1 using scalar mode.");
1622
              vec_oprnd1 = op1;
1623
            }
1624
        }
1625
 
1626
      if (!vec_oprnd1)
1627
        vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1628
    }
1629
 
1630
  /* Arguments are ready. create the new vector stmt.  */
1631
 
1632
  if (op_type == binary_op)
1633
    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1634
                build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1635
  else
1636
    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1637
                build1 (code, vectype, vec_oprnd0));
1638
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1639
  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1640
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1641
 
1642
  return true;
1643
}
1644
 
1645
 
1646
/* Function vectorizable_store.
1647
 
1648
   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1649
   can be vectorized.
1650
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1651
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1652
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1653
 
1654
bool
1655
vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1656
{
1657
  tree scalar_dest;
1658
  tree data_ref;
1659
  tree op;
1660
  tree vec_oprnd1;
1661
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1662
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1663
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1664
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1665
  enum machine_mode vec_mode;
1666
  tree dummy;
1667
  enum dr_alignment_support alignment_support_cheme;
1668
  ssa_op_iter iter;
1669
  tree def, def_stmt;
1670
  enum vect_def_type dt;
1671
 
1672
  /* Is vectorizable store? */
1673
 
1674
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1675
    return false;
1676
 
1677
  scalar_dest = TREE_OPERAND (stmt, 0);
1678
  if (TREE_CODE (scalar_dest) != ARRAY_REF
1679
      && TREE_CODE (scalar_dest) != INDIRECT_REF)
1680
    return false;
1681
 
1682
  op = TREE_OPERAND (stmt, 1);
1683
  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1684
    {
1685
      if (vect_print_dump_info (REPORT_DETAILS))
1686
        fprintf (vect_dump, "use not simple.");
1687
      return false;
1688
    }
1689
 
1690
  vec_mode = TYPE_MODE (vectype);
1691
  /* FORNOW. In some cases can vectorize even if data-type not supported
1692
     (e.g. - array initialization with 0).  */
1693
  if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1694
    return false;
1695
 
1696
  if (!STMT_VINFO_DATA_REF (stmt_info))
1697
    return false;
1698
 
1699
 
1700
  if (!vec_stmt) /* transformation not required.  */
1701
    {
1702
      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1703
      return true;
1704
    }
1705
 
1706
  /** Transform.  **/
1707
 
1708
  if (vect_print_dump_info (REPORT_DETAILS))
1709
    fprintf (vect_dump, "transform store");
1710
 
1711
  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1712
  gcc_assert (alignment_support_cheme);
1713
  gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
1714
 
1715
  /* Handle use - get the vectorized def from the defining stmt.  */
1716
  vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1717
 
1718
  /* Handle def.  */
1719
  /* FORNOW: make sure the data reference is aligned.  */
1720
  vect_align_data_ref (stmt);
1721
  data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1722
  data_ref = build_fold_indirect_ref (data_ref);
1723
 
1724
  /* Arguments are ready. create the new vector stmt.  */
1725
  *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1726
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1727
 
1728
  /* Copy the V_MAY_DEFS representing the aliasing of the original array
1729
     element's definition to the vector's definition then update the
1730
     defining statement.  The original is being deleted so the same
1731
     SSA_NAMEs can be used.  */
1732
  copy_virtual_operands (*vec_stmt, stmt);
1733
 
1734
  FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1735
    {
1736
      SSA_NAME_DEF_STMT (def) = *vec_stmt;
1737
 
1738
      /* If this virtual def has a use outside the loop and a loop peel is
1739
         performed then the def may be renamed by the peel.  Mark it for
1740
         renaming so the later use will also be renamed.  */
1741
      mark_sym_for_renaming (SSA_NAME_VAR (def));
1742
    }
1743
 
1744
  return true;
1745
}
1746
 
1747
 
1748
/* vectorizable_load.
1749
 
1750
   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1751
   can be vectorized.
1752
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1753
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1754
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1755
 
1756
bool
1757
vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1758
{
1759
  tree scalar_dest;
1760
  tree vec_dest = NULL;
1761
  tree data_ref = NULL;
1762
  tree op;
1763
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1764
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1765
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1766
  tree new_temp;
1767
  int mode;
1768
  tree init_addr;
1769
  tree new_stmt;
1770
  tree dummy;
1771
  basic_block new_bb;
1772
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1773
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774
  edge pe = loop_preheader_edge (loop);
1775
  enum dr_alignment_support alignment_support_cheme;
1776
 
1777
  /* Is vectorizable load? */
1778
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1779
    return false;
1780
 
1781
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1782
 
1783
  if (STMT_VINFO_LIVE_P (stmt_info))
1784
    {
1785
      /* FORNOW: not yet supported.  */
1786
      if (vect_print_dump_info (REPORT_DETAILS))
1787
        fprintf (vect_dump, "value used after loop.");
1788
      return false;
1789
    }
1790
 
1791
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1792
    return false;
1793
 
1794
  scalar_dest = TREE_OPERAND (stmt, 0);
1795
  if (TREE_CODE (scalar_dest) != SSA_NAME)
1796
    return false;
1797
 
1798
  op = TREE_OPERAND (stmt, 1);
1799
  if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1800
    return false;
1801
 
1802
  if (!STMT_VINFO_DATA_REF (stmt_info))
1803
    return false;
1804
 
1805
  mode = (int) TYPE_MODE (vectype);
1806
 
1807
  /* FORNOW. In some cases can vectorize even if data-type not supported
1808
    (e.g. - data copies).  */
1809
  if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1810
    {
1811
      if (vect_print_dump_info (REPORT_DETAILS))
1812
        fprintf (vect_dump, "Aligned load, but unsupported type.");
1813
      return false;
1814
    }
1815
 
1816
  if (!vec_stmt) /* transformation not required.  */
1817
    {
1818
      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1819
      return true;
1820
    }
1821
 
1822
  /** Transform.  **/
1823
 
1824
  if (vect_print_dump_info (REPORT_DETAILS))
1825
    fprintf (vect_dump, "transform load.");
1826
 
1827
  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1828
  gcc_assert (alignment_support_cheme);
1829
 
1830
  if (alignment_support_cheme == dr_aligned
1831
      || alignment_support_cheme == dr_unaligned_supported)
1832
    {
1833
      /* Create:
1834
         p = initial_addr;
1835
         indx = 0;
1836
         loop {
1837
           vec_dest = *(p);
1838
           indx = indx + 1;
1839
         }
1840
      */
1841
 
1842
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1843
      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1844
      if (aligned_access_p (dr))
1845
        data_ref = build_fold_indirect_ref (data_ref);
1846
      else
1847
        {
1848
          int mis = DR_MISALIGNMENT (dr);
1849
          tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1850
          tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1851
          data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1852
        }
1853
      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1854
      new_temp = make_ssa_name (vec_dest, new_stmt);
1855
      TREE_OPERAND (new_stmt, 0) = new_temp;
1856
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1857
      copy_virtual_operands (new_stmt, stmt);
1858
    }
1859
  else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1860
    {
1861
      /* Create:
1862
         p1 = initial_addr;
1863
         msq_init = *(floor(p1))
1864
         p2 = initial_addr + VS - 1;
1865
         magic = have_builtin ? builtin_result : initial_address;
1866
         indx = 0;
1867
         loop {
1868
           p2' = p2 + indx * vectype_size
1869
           lsq = *(floor(p2'))
1870
           vec_dest = realign_load (msq, lsq, magic)
1871
           indx = indx + 1;
1872
           msq = lsq;
1873
         }
1874
      */
1875
 
1876
      tree offset;
1877
      tree magic;
1878
      tree phi_stmt;
1879
      tree msq_init;
1880
      tree msq, lsq;
1881
      tree dataref_ptr;
1882
      tree params;
1883
 
1884
      /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
1885
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1886
      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1887
                                           &init_addr, true);
1888
      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1889
      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1890
      new_temp = make_ssa_name (vec_dest, new_stmt);
1891
      TREE_OPERAND (new_stmt, 0) = new_temp;
1892
      new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1893
      gcc_assert (!new_bb);
1894
      msq_init = TREE_OPERAND (new_stmt, 0);
1895
      copy_virtual_operands (new_stmt, stmt);
1896
      update_vuses_to_preheader (new_stmt, loop);
1897
 
1898
 
1899
      /* <2> Create lsq = *(floor(p2')) in the loop  */
1900
      offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
1901
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1902
      dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1903
      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1904
      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1905
      new_temp = make_ssa_name (vec_dest, new_stmt);
1906
      TREE_OPERAND (new_stmt, 0) = new_temp;
1907
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1908
      lsq = TREE_OPERAND (new_stmt, 0);
1909
      copy_virtual_operands (new_stmt, stmt);
1910
 
1911
 
1912
      /* <3> */
1913
      if (targetm.vectorize.builtin_mask_for_load)
1914
        {
1915
          /* Create permutation mask, if required, in loop preheader.  */
1916
          tree builtin_decl;
1917
          params = build_tree_list (NULL_TREE, init_addr);
1918
          vec_dest = vect_create_destination_var (scalar_dest, vectype);
1919
          builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1920
          new_stmt = build_function_call_expr (builtin_decl, params);
1921
          new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1922
          new_temp = make_ssa_name (vec_dest, new_stmt);
1923
          TREE_OPERAND (new_stmt, 0) = new_temp;
1924
          new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1925
          gcc_assert (!new_bb);
1926
          magic = TREE_OPERAND (new_stmt, 0);
1927
 
1928
          /* The result of the CALL_EXPR to this builtin is determined from
1929
             the value of the parameter and no global variables are touched
1930
             which makes the builtin a "const" function.  Requiring the
1931
             builtin to have the "const" attribute makes it unnecessary
1932
             to call mark_call_clobbered.  */
1933
          gcc_assert (TREE_READONLY (builtin_decl));
1934
        }
1935
      else
1936
        {
1937
          /* Use current address instead of init_addr for reduced reg pressure.
1938
           */
1939
          magic = dataref_ptr;
1940
        }
1941
 
1942
 
1943
      /* <4> Create msq = phi <msq_init, lsq> in loop  */
1944
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1945
      msq = make_ssa_name (vec_dest, NULL_TREE);
1946
      phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1947
      SSA_NAME_DEF_STMT (msq) = phi_stmt;
1948
      add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1949
      add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1950
 
1951
 
1952
      /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
1953
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1954
      new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1955
      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1956
      new_temp = make_ssa_name (vec_dest, new_stmt);
1957
      TREE_OPERAND (new_stmt, 0) = new_temp;
1958
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1959
    }
1960
  else
1961
    gcc_unreachable ();
1962
 
1963
  *vec_stmt = new_stmt;
1964
  return true;
1965
}
1966
 
1967
 
1968
/* Function vectorizable_live_operation.
1969
 
1970
   STMT computes a value that is used outside the loop. Check if
1971
   it can be supported.  */
1972
 
1973
bool
1974
vectorizable_live_operation (tree stmt,
1975
                             block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1976
                             tree *vec_stmt ATTRIBUTE_UNUSED)
1977
{
1978
  tree operation;
1979
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1980
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1981
  int i;
1982
  enum tree_code code;
1983
  int op_type;
1984
  tree op;
1985
  tree def, def_stmt;
1986
  enum vect_def_type dt;
1987
 
1988
  if (!STMT_VINFO_LIVE_P (stmt_info))
1989
    return false;
1990
 
1991
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1992
    return false;
1993
 
1994
  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1995
    return false;
1996
 
1997
  operation = TREE_OPERAND (stmt, 1);
1998
  code = TREE_CODE (operation);
1999
 
2000
  op_type = TREE_CODE_LENGTH (code);
2001
 
2002
  /* FORNOW: support only if all uses are invariant. This means
2003
     that the scalar operations can remain in place, unvectorized.
2004
     The original last scalar value that they compute will be used.  */
2005
 
2006
  for (i = 0; i < op_type; i++)
2007
    {
2008
      op = TREE_OPERAND (operation, i);
2009
      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2010
        {
2011
          if (vect_print_dump_info (REPORT_DETAILS))
2012
            fprintf (vect_dump, "use not simple.");
2013
          return false;
2014
        }
2015
 
2016
      if (dt != vect_invariant_def && dt != vect_constant_def)
2017
        return false;
2018
    }
2019
 
2020
  /* No transformation is required for the cases we currently support.  */
2021
  return true;
2022
}
2023
 
2024
 
2025
/* Function vect_is_simple_cond.
2026
 
2027
   Input:
2028
   LOOP - the loop that is being vectorized.
2029
   COND - Condition that is checked for simple use.
2030
 
2031
   Returns whether a COND can be vectorized.  Checks whether
2032
   condition operands are supportable using vec_is_simple_use.  */
2033
 
2034
static bool
2035
vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
2036
{
2037
  tree lhs, rhs;
2038
  tree def;
2039
  enum vect_def_type dt;
2040
 
2041
  if (!COMPARISON_CLASS_P (cond))
2042
    return false;
2043
 
2044
  lhs = TREE_OPERAND (cond, 0);
2045
  rhs = TREE_OPERAND (cond, 1);
2046
 
2047
  if (TREE_CODE (lhs) == SSA_NAME)
2048
    {
2049
      tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
2050
      if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
2051
        return false;
2052
    }
2053
  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
2054
    return false;
2055
 
2056
  if (TREE_CODE (rhs) == SSA_NAME)
2057
    {
2058
      tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
2059
      if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
2060
        return false;
2061
    }
2062
  else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
2063
    return false;
2064
 
2065
  return true;
2066
}
2067
 
2068
/* vectorizable_condition.
2069
 
2070
   Check if STMT is conditional modify expression that can be vectorized.
2071
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2072
   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
2073
   at BSI.
2074
 
2075
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2076
 
2077
bool
2078
vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2079
{
2080
  tree scalar_dest = NULL_TREE;
2081
  tree vec_dest = NULL_TREE;
2082
  tree op = NULL_TREE;
2083
  tree cond_expr, then_clause, else_clause;
2084
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2085
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2086
  tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2087
  tree vec_compare, vec_cond_expr;
2088
  tree new_temp;
2089
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2090
  enum machine_mode vec_mode;
2091
  tree def;
2092
  enum vect_def_type dt;
2093
 
2094
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
2095
    return false;
2096
 
2097
  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2098
 
2099
  if (STMT_VINFO_LIVE_P (stmt_info))
2100
    {
2101
      /* FORNOW: not yet supported.  */
2102
      if (vect_print_dump_info (REPORT_DETAILS))
2103
        fprintf (vect_dump, "value used after loop.");
2104
      return false;
2105
    }
2106
 
2107
  if (TREE_CODE (stmt) != MODIFY_EXPR)
2108
    return false;
2109
 
2110
  op = TREE_OPERAND (stmt, 1);
2111
 
2112
  if (TREE_CODE (op) != COND_EXPR)
2113
    return false;
2114
 
2115
  cond_expr = TREE_OPERAND (op, 0);
2116
  then_clause = TREE_OPERAND (op, 1);
2117
  else_clause = TREE_OPERAND (op, 2);
2118
 
2119
  if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2120
    return false;
2121
 
2122
  /* We do not handle two different vector types for the condition
2123
     and the values.  */
2124
  if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
2125
    return false;
2126
 
2127
  if (TREE_CODE (then_clause) == SSA_NAME)
2128
    {
2129
      tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2130
      if (!vect_is_simple_use (then_clause, loop_vinfo,
2131
                               &then_def_stmt, &def, &dt))
2132
        return false;
2133
    }
2134
  else if (TREE_CODE (then_clause) != INTEGER_CST
2135
           && TREE_CODE (then_clause) != REAL_CST)
2136
    return false;
2137
 
2138
  if (TREE_CODE (else_clause) == SSA_NAME)
2139
    {
2140
      tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2141
      if (!vect_is_simple_use (else_clause, loop_vinfo,
2142
                               &else_def_stmt, &def, &dt))
2143
        return false;
2144
    }
2145
  else if (TREE_CODE (else_clause) != INTEGER_CST
2146
           && TREE_CODE (else_clause) != REAL_CST)
2147
    return false;
2148
 
2149
 
2150
  vec_mode = TYPE_MODE (vectype);
2151
 
2152
  if (!vec_stmt)
2153
    {
2154
      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2155
      return expand_vec_cond_expr_p (op, vec_mode);
2156
    }
2157
 
2158
  /* Transform */
2159
 
2160
  /* Handle def.  */
2161
  scalar_dest = TREE_OPERAND (stmt, 0);
2162
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
2163
 
2164
  /* Handle cond expr.  */
2165
  vec_cond_lhs =
2166
    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2167
  vec_cond_rhs =
2168
    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2169
  vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2170
  vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2171
 
2172
  /* Arguments are ready. create the new vector stmt.  */
2173
  vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2174
                        vec_cond_lhs, vec_cond_rhs);
2175
  vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
2176
                          vec_compare, vec_then_clause, vec_else_clause);
2177
 
2178
  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2179
  new_temp = make_ssa_name (vec_dest, *vec_stmt);
2180
  TREE_OPERAND (*vec_stmt, 0) = new_temp;
2181
  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2182
 
2183
  return true;
2184
}
2185
 
2186
/* Function vect_transform_stmt.
2187
 
2188
   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2189
 
2190
bool
2191
vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2192
{
2193
  bool is_store = false;
2194
  tree vec_stmt = NULL_TREE;
2195
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2196
  tree orig_stmt_in_pattern;
2197
  bool done;
2198
 
2199
  if (STMT_VINFO_RELEVANT_P (stmt_info))
2200
    {
2201
      switch (STMT_VINFO_TYPE (stmt_info))
2202
      {
2203
      case op_vec_info_type:
2204
        done = vectorizable_operation (stmt, bsi, &vec_stmt);
2205
        gcc_assert (done);
2206
        break;
2207
 
2208
      case assignment_vec_info_type:
2209
        done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2210
        gcc_assert (done);
2211
        break;
2212
 
2213
      case load_vec_info_type:
2214
        done = vectorizable_load (stmt, bsi, &vec_stmt);
2215
        gcc_assert (done);
2216
        break;
2217
 
2218
      case store_vec_info_type:
2219
        done = vectorizable_store (stmt, bsi, &vec_stmt);
2220
        gcc_assert (done);
2221
        is_store = true;
2222
        break;
2223
 
2224
      case condition_vec_info_type:
2225
        done = vectorizable_condition (stmt, bsi, &vec_stmt);
2226
        gcc_assert (done);
2227
        break;
2228
 
2229
      default:
2230
        if (vect_print_dump_info (REPORT_DETAILS))
2231
          fprintf (vect_dump, "stmt not supported.");
2232
        gcc_unreachable ();
2233
      }
2234
 
2235
      gcc_assert (vec_stmt);
2236
      STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2237
      orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
2238
      if (orig_stmt_in_pattern)
2239
        {
2240
          stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
2241
          if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2242
            {
2243
              gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2244
 
2245
              /* STMT was inserted by the vectorizer to replace a computation
2246
                 idiom.  ORIG_STMT_IN_PATTERN is a stmt in the original
2247
                 sequence that computed this idiom.  We need to record a pointer
2248
                 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN.  See more
2249
                 detail in the documentation of vect_pattern_recog.  */
2250
 
2251
              STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
2252
            }
2253
        }
2254
    }
2255
 
2256
  if (STMT_VINFO_LIVE_P (stmt_info))
2257
    {
2258
      switch (STMT_VINFO_TYPE (stmt_info))
2259
      {
2260
      case reduc_vec_info_type:
2261
        done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2262
        gcc_assert (done);
2263
        break;
2264
 
2265
      default:
2266
        done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2267
        gcc_assert (done);
2268
      }
2269
 
2270
      if (vec_stmt)
2271
        {
2272
          gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2273
          STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2274
        }
2275
    }
2276
 
2277
  return is_store;
2278
}
2279
 
2280
 
2281
/* This function builds ni_name = number of iterations loop executes
2282
   on the loop preheader.  */
2283
 
2284
static tree
2285
vect_build_loop_niters (loop_vec_info loop_vinfo)
2286
{
2287
  tree ni_name, stmt, var;
2288
  edge pe;
2289
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2290
  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2291
 
2292
  var = create_tmp_var (TREE_TYPE (ni), "niters");
2293
  add_referenced_var (var);
2294
  ni_name = force_gimple_operand (ni, &stmt, false, var);
2295
 
2296
  pe = loop_preheader_edge (loop);
2297
  if (stmt)
2298
    {
2299
      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2300
      gcc_assert (!new_bb);
2301
    }
2302
 
2303
  return ni_name;
2304
}
2305
 
2306
 
2307
/* This function generates the following statements:
2308
 
2309
 ni_name = number of iterations loop executes
2310
 ratio = ni_name / vf
2311
 ratio_mult_vf_name = ratio * vf
2312
 
2313
 and places them at the loop preheader edge.  */
2314
 
2315
static void
2316
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2317
                                 tree *ni_name_ptr,
2318
                                 tree *ratio_mult_vf_name_ptr,
2319
                                 tree *ratio_name_ptr)
2320
{
2321
 
2322
  edge pe;
2323
  basic_block new_bb;
2324
  tree stmt, ni_name;
2325
  tree var;
2326
  tree ratio_name;
2327
  tree ratio_mult_vf_name;
2328
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2329
  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2330
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2331
  tree log_vf;
2332
 
2333
  pe = loop_preheader_edge (loop);
2334
 
2335
  /* Generate temporary variable that contains
2336
     number of iterations loop executes.  */
2337
 
2338
  ni_name = vect_build_loop_niters (loop_vinfo);
2339
  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
2340
 
2341
  /* Create: ratio = ni >> log2(vf) */
2342
 
2343
  var = create_tmp_var (TREE_TYPE (ni), "bnd");
2344
  add_referenced_var (var);
2345
  ratio_name = make_ssa_name (var, NULL_TREE);
2346
  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2347
           build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2348
  SSA_NAME_DEF_STMT (ratio_name) = stmt;
2349
 
2350
  pe = loop_preheader_edge (loop);
2351
  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2352
  gcc_assert (!new_bb);
2353
 
2354
  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2355
 
2356
  var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2357
  add_referenced_var (var);
2358
  ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2359
  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2360
           build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2361
  SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2362
 
2363
  pe = loop_preheader_edge (loop);
2364
  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2365
  gcc_assert (!new_bb);
2366
 
2367
  *ni_name_ptr = ni_name;
2368
  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2369
  *ratio_name_ptr = ratio_name;
2370
 
2371
  return;
2372
}
2373
 
2374
 
2375
/* Function update_vuses_to_preheader.
2376
 
2377
   Input:
2378
   STMT - a statement with potential VUSEs.
2379
   LOOP - the loop whose preheader will contain STMT.
2380
 
2381
   It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2382
   appears to be defined in a V_MAY_DEF in another statement in a loop.
2383
   One such case is when the VUSE is at the dereference of a __restricted__
2384
   pointer in a load and the V_MAY_DEF is at the dereference of a different
2385
   __restricted__ pointer in a store.  Vectorization may result in
2386
   copy_virtual_uses being called to copy the problematic VUSE to a new
2387
   statement that is being inserted in the loop preheader.  This procedure
2388
   is called to change the SSA_NAME in the new statement's VUSE from the
2389
   SSA_NAME updated in the loop to the related SSA_NAME available on the
2390
   path entering the loop.
2391
 
2392
   When this function is called, we have the following situation:
2393
 
2394
        # vuse <name1>
2395
        S1: vload
2396
    do {
2397
        # name1 = phi < name0 , name2>
2398
 
2399
        # vuse <name1>
2400
        S2: vload
2401
 
2402
        # name2 = vdef <name1>
2403
        S3: vstore
2404
 
2405
    }while...
2406
 
2407
   Stmt S1 was created in the loop preheader block as part of misaligned-load
2408
   handling. This function fixes the name of the vuse of S1 from 'name1' to
2409
   'name0'.  */
2410
 
2411
static void
2412
update_vuses_to_preheader (tree stmt, struct loop *loop)
2413
{
2414
  basic_block header_bb = loop->header;
2415
  edge preheader_e = loop_preheader_edge (loop);
2416
  ssa_op_iter iter;
2417
  use_operand_p use_p;
2418
 
2419
  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2420
    {
2421
      tree ssa_name = USE_FROM_PTR (use_p);
2422
      tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2423
      tree name_var = SSA_NAME_VAR (ssa_name);
2424
      basic_block bb = bb_for_stmt (def_stmt);
2425
 
2426
      /* For a use before any definitions, def_stmt is a NOP_EXPR.  */
2427
      if (!IS_EMPTY_STMT (def_stmt)
2428
          && flow_bb_inside_loop_p (loop, bb))
2429
        {
2430
          /* If the block containing the statement defining the SSA_NAME
2431
             is in the loop then it's necessary to find the definition
2432
             outside the loop using the PHI nodes of the header.  */
2433
          tree phi;
2434
          bool updated = false;
2435
 
2436
          for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2437
            {
2438
              if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2439
                {
2440
                  SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2441
                  updated = true;
2442
                  break;
2443
                }
2444
            }
2445
          gcc_assert (updated);
2446
        }
2447
    }
2448
}
2449
 
2450
 
2451
/*   Function vect_update_ivs_after_vectorizer.
2452
 
2453
     "Advance" the induction variables of LOOP to the value they should take
2454
     after the execution of LOOP.  This is currently necessary because the
2455
     vectorizer does not handle induction variables that are used after the
2456
     loop.  Such a situation occurs when the last iterations of LOOP are
2457
     peeled, because:
2458
     1. We introduced new uses after LOOP for IVs that were not originally used
2459
        after LOOP: the IVs of LOOP are now used by an epilog loop.
2460
     2. LOOP is going to be vectorized; this means that it will iterate N/VF
2461
        times, whereas the loop IVs should be bumped N times.
2462
 
2463
     Input:
2464
     - LOOP - a loop that is going to be vectorized. The last few iterations
2465
              of LOOP were peeled.
2466
     - NITERS - the number of iterations that LOOP executes (before it is
2467
                vectorized). i.e, the number of times the ivs should be bumped.
2468
     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2469
                  coming out from LOOP on which there are uses of the LOOP ivs
2470
                  (this is the path from LOOP->exit to epilog_loop->preheader).
2471
 
2472
                  The new definitions of the ivs are placed in LOOP->exit.
2473
                  The phi args associated with the edge UPDATE_E in the bb
2474
                  UPDATE_E->dest are updated accordingly.
2475
 
2476
     Assumption 1: Like the rest of the vectorizer, this function assumes
2477
     a single loop exit that has a single predecessor.
2478
 
2479
     Assumption 2: The phi nodes in the LOOP header and in update_bb are
2480
     organized in the same order.
2481
 
2482
     Assumption 3: The access function of the ivs is simple enough (see
2483
     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2484
 
2485
     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2486
     coming out of LOOP on which the ivs of LOOP are used (this is the path
2487
     that leads to the epilog loop; other paths skip the epilog loop).  This
2488
     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2489
     needs to have its phis updated.
2490
 */
2491
 
2492
static void
2493
vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2494
                                  edge update_e)
2495
{
2496
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2497
  basic_block exit_bb = loop->single_exit->dest;
2498
  tree phi, phi1;
2499
  basic_block update_bb = update_e->dest;
2500
 
2501
  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2502
 
2503
  /* Make sure there exists a single-predecessor exit bb:  */
2504
  gcc_assert (single_pred_p (exit_bb));
2505
 
2506
  for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2507
       phi && phi1;
2508
       phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2509
    {
2510
      tree access_fn = NULL;
2511
      tree evolution_part;
2512
      tree init_expr;
2513
      tree step_expr;
2514
      tree var, stmt, ni, ni_name;
2515
      block_stmt_iterator last_bsi;
2516
 
2517
      if (vect_print_dump_info (REPORT_DETAILS))
2518
        {
2519
          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2520
          print_generic_expr (vect_dump, phi, TDF_SLIM);
2521
        }
2522
 
2523
      /* Skip virtual phi's.  */
2524
      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2525
        {
2526
          if (vect_print_dump_info (REPORT_DETAILS))
2527
            fprintf (vect_dump, "virtual phi. skip.");
2528
          continue;
2529
        }
2530
 
2531
      /* Skip reduction phis.  */
2532
      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2533
        {
2534
          if (vect_print_dump_info (REPORT_DETAILS))
2535
            fprintf (vect_dump, "reduc phi. skip.");
2536
          continue;
2537
        }
2538
 
2539
      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2540
      gcc_assert (access_fn);
2541
      evolution_part =
2542
         unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2543
      gcc_assert (evolution_part != NULL_TREE);
2544
 
2545
      /* FORNOW: We do not support IVs whose evolution function is a polynomial
2546
         of degree >= 2 or exponential.  */
2547
      gcc_assert (!tree_is_chrec (evolution_part));
2548
 
2549
      step_expr = evolution_part;
2550
      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2551
                                                               loop->num));
2552
 
2553
      ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2554
                  build2 (MULT_EXPR, TREE_TYPE (niters),
2555
                       niters, step_expr), init_expr);
2556
 
2557
      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2558
      add_referenced_var (var);
2559
 
2560
      ni_name = force_gimple_operand (ni, &stmt, false, var);
2561
 
2562
      /* Insert stmt into exit_bb.  */
2563
      last_bsi = bsi_last (exit_bb);
2564
      if (stmt)
2565
        bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2566
 
2567
      /* Fix phi expressions in the successor bb.  */
2568
      SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2569
    }
2570
}
2571
 
2572
 
2573
/* Function vect_do_peeling_for_loop_bound
2574
 
2575
   Peel the last iterations of the loop represented by LOOP_VINFO.
2576
   The peeled iterations form a new epilog loop.  Given that the loop now
2577
   iterates NITERS times, the new epilog loop iterates
2578
   NITERS % VECTORIZATION_FACTOR times.
2579
 
2580
   The original loop will later be made to iterate
2581
   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2582
 
2583
static void
2584
vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2585
                                struct loops *loops)
2586
{
2587
  tree ni_name, ratio_mult_vf_name;
2588
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2589
  struct loop *new_loop;
2590
  edge update_e;
2591
  basic_block preheader;
2592
  int loop_num;
2593
 
2594
  if (vect_print_dump_info (REPORT_DETAILS))
2595
    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2596
 
2597
  initialize_original_copy_tables ();
2598
 
2599
  /* Generate the following variables on the preheader of original loop:
2600
 
2601
     ni_name = number of iteration the original loop executes
2602
     ratio = ni_name / vf
2603
     ratio_mult_vf_name = ratio * vf  */
2604
  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2605
                                   &ratio_mult_vf_name, ratio);
2606
 
2607
  loop_num  = loop->num;
2608
  new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2609
                                            ratio_mult_vf_name, ni_name, false);
2610
  gcc_assert (new_loop);
2611
  gcc_assert (loop_num == loop->num);
2612
#ifdef ENABLE_CHECKING
2613
  slpeel_verify_cfg_after_peeling (loop, new_loop);
2614
#endif
2615
 
2616
  /* A guard that controls whether the new_loop is to be executed or skipped
2617
     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
2618
     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
2619
     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
2620
     is on the path where the LOOP IVs are used and need to be updated.  */
2621
 
2622
  preheader = loop_preheader_edge (new_loop)->src;
2623
  if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2624
    update_e = EDGE_PRED (preheader, 0);
2625
  else
2626
    update_e = EDGE_PRED (preheader, 1);
2627
 
2628
  /* Update IVs of original loop as if they were advanced
2629
     by ratio_mult_vf_name steps.  */
2630
  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2631
 
2632
  /* After peeling we have to reset scalar evolution analyzer.  */
2633
  scev_reset ();
2634
 
2635
  free_original_copy_tables ();
2636
}
2637
 
2638
 
2639
/* Function vect_gen_niters_for_prolog_loop
2640
 
2641
   Set the number of iterations for the loop represented by LOOP_VINFO
2642
   to the minimum between LOOP_NITERS (the original iteration count of the loop)
2643
   and the misalignment of DR - the data reference recorded in
2644
   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
2645
   this loop, the data reference DR will refer to an aligned location.
2646
 
2647
   The following computation is generated:
2648
 
2649
   If the misalignment of DR is known at compile time:
2650
     addr_mis = int mis = DR_MISALIGNMENT (dr);
2651
   Else, compute address misalignment in bytes:
2652
     addr_mis = addr & (vectype_size - 1)
2653
 
2654
   prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2655
 
2656
   (elem_size = element type size; an element is the scalar element
2657
        whose type is the inner type of the vectype)  */
2658
 
2659
static tree
2660
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2661
{
2662
  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2663
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2664
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2665
  tree var, stmt;
2666
  tree iters, iters_name;
2667
  edge pe;
2668
  basic_block new_bb;
2669
  tree dr_stmt = DR_STMT (dr);
2670
  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2671
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2672
  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2673
  tree niters_type = TREE_TYPE (loop_niters);
2674
 
2675
  pe = loop_preheader_edge (loop);
2676
 
2677
  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2678
    {
2679
      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2680
      int element_size = vectype_align/vf;
2681
      int elem_misalign = byte_misalign / element_size;
2682
 
2683
      if (vect_print_dump_info (REPORT_DETAILS))
2684
        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2685
      iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2686
    }
2687
  else
2688
    {
2689
      tree new_stmts = NULL_TREE;
2690
      tree start_addr =
2691
        vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2692
      tree ptr_type = TREE_TYPE (start_addr);
2693
      tree size = TYPE_SIZE (ptr_type);
2694
      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2695
      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2696
      tree elem_size_log =
2697
        build_int_cst (type, exact_log2 (vectype_align/vf));
2698
      tree vf_minus_1 = build_int_cst (type, vf - 1);
2699
      tree vf_tree = build_int_cst (type, vf);
2700
      tree byte_misalign;
2701
      tree elem_misalign;
2702
 
2703
      new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2704
      gcc_assert (!new_bb);
2705
 
2706
      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2707
      byte_misalign =
2708
        build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2709
 
2710
      /* Create:  elem_misalign = byte_misalign / element_size  */
2711
      elem_misalign =
2712
        build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2713
 
2714
      /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
2715
      iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
2716
      iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
2717
      iters = fold_convert (niters_type, iters);
2718
    }
2719
 
2720
  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2721
  /* If the loop bound is known at compile time we already verified that it is
2722
     greater than vf; since the misalignment ('iters') is at most vf, there's
2723
     no need to generate the MIN_EXPR in this case.  */
2724
  if (TREE_CODE (loop_niters) != INTEGER_CST)
2725
    iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2726
 
2727
  if (vect_print_dump_info (REPORT_DETAILS))
2728
    {
2729
      fprintf (vect_dump, "niters for prolog loop: ");
2730
      print_generic_expr (vect_dump, iters, TDF_SLIM);
2731
    }
2732
 
2733
  var = create_tmp_var (niters_type, "prolog_loop_niters");
2734
  add_referenced_var (var);
2735
  iters_name = force_gimple_operand (iters, &stmt, false, var);
2736
 
2737
  /* Insert stmt on loop preheader edge.  */
2738
  if (stmt)
2739
    {
2740
      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2741
      gcc_assert (!new_bb);
2742
    }
2743
 
2744
  return iters_name;
2745
}
2746
 
2747
 
2748
/* Function vect_update_init_of_dr
2749
 
2750
   NITERS iterations were peeled from LOOP.  DR represents a data reference
2751
   in LOOP.  This function updates the information recorded in DR to
2752
   account for the fact that the first NITERS iterations had already been
2753
   executed.  Specifically, it updates the OFFSET field of DR.  */
2754
 
2755
static void
2756
vect_update_init_of_dr (struct data_reference *dr, tree niters)
2757
{
2758
  tree offset = DR_OFFSET (dr);
2759
 
2760
  niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2761
  offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2762
  DR_OFFSET (dr) = offset;
2763
}
2764
 
2765
 
2766
/* Function vect_update_inits_of_drs
2767
 
2768
   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2769
   This function updates the information recorded for the data references in
2770
   the loop to account for the fact that the first NITERS iterations had
2771
   already been executed.  Specifically, it updates the initial_condition of the
2772
   access_function of all the data_references in the loop.  */
2773
 
2774
static void
2775
vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2776
{
2777
  unsigned int i;
2778
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2779
  struct data_reference *dr;
2780
 
2781
  if (vect_dump && (dump_flags & TDF_DETAILS))
2782
    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2783
 
2784
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2785
    vect_update_init_of_dr (dr, niters);
2786
}
2787
 
2788
 
2789
/* Function vect_do_peeling_for_alignment
2790
 
2791
   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2792
   'niters' is set to the misalignment of one of the data references in the
2793
   loop, thereby forcing it to refer to an aligned location at the beginning
2794
   of the execution of this loop.  The data reference for which we are
2795
   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2796
 
2797
static void
2798
vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2799
{
2800
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2801
  tree niters_of_prolog_loop, ni_name;
2802
  tree n_iters;
2803
  struct loop *new_loop;
2804
 
2805
  if (vect_print_dump_info (REPORT_DETAILS))
2806
    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2807
 
2808
  initialize_original_copy_tables ();
2809
 
2810
  ni_name = vect_build_loop_niters (loop_vinfo);
2811
  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2812
 
2813
  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2814
  new_loop =
2815
        slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2816
                                       niters_of_prolog_loop, ni_name, true);
2817
  gcc_assert (new_loop);
2818
#ifdef ENABLE_CHECKING
2819
  slpeel_verify_cfg_after_peeling (new_loop, loop);
2820
#endif
2821
 
2822
  /* Update number of times loop executes.  */
2823
  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2824
  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2825
                TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2826
 
2827
  /* Update the init conditions of the access functions of all data refs.  */
2828
  vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2829
 
2830
  /* After peeling we have to reset scalar evolution analyzer.  */
2831
  scev_reset ();
2832
 
2833
  free_original_copy_tables ();
2834
}
2835
 
2836
 
2837
/* Function vect_create_cond_for_align_checks.
2838
 
2839
   Create a conditional expression that represents the alignment checks for
2840
   all of data references (array element references) whose alignment must be
2841
   checked at runtime.
2842
 
2843
   Input:
2844
   LOOP_VINFO - two fields of the loop information are used.
2845
                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2846
                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2847
 
2848
   Output:
2849
   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2850
                         expression.
2851
   The returned value is the conditional expression to be used in the if
2852
   statement that controls which version of the loop gets executed at runtime.
2853
 
2854
   The algorithm makes two assumptions:
2855
     1) The number of bytes "n" in a vector is a power of 2.
2856
     2) An address "a" is aligned if a%n is zero and that this
2857
        test can be done as a&(n-1) == 0.  For example, for 16
2858
        byte vectors the test is a&0xf == 0.  */
2859
 
2860
static tree
2861
vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2862
                                   tree *cond_expr_stmt_list)
2863
{
2864
  VEC(tree,heap) *may_misalign_stmts
2865
    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2866
  tree ref_stmt;
2867
  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2868
  tree mask_cst;
2869
  unsigned int i;
2870
  tree psize;
2871
  tree int_ptrsize_type;
2872
  char tmp_name[20];
2873
  tree or_tmp_name = NULL_TREE;
2874
  tree and_tmp, and_tmp_name, and_stmt;
2875
  tree ptrsize_zero;
2876
 
2877
  /* Check that mask is one less than a power of 2, i.e., mask is
2878
     all zeros followed by all ones.  */
2879
  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2880
 
2881
  /* CHECKME: what is the best integer or unsigned type to use to hold a
2882
     cast from a pointer value?  */
2883
  psize = TYPE_SIZE (ptr_type_node);
2884
  int_ptrsize_type
2885
    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2886
 
2887
  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2888
     of the first vector of the i'th data reference. */
2889
 
2890
  for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2891
    {
2892
      tree new_stmt_list = NULL_TREE;
2893
      tree addr_base;
2894
      tree addr_tmp, addr_tmp_name, addr_stmt;
2895
      tree or_tmp, new_or_tmp_name, or_stmt;
2896
 
2897
      /* create: addr_tmp = (int)(address_of_first_vector) */
2898
      addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2899
                                                        &new_stmt_list,
2900
                                                        NULL_TREE);
2901
 
2902
      if (new_stmt_list != NULL_TREE)
2903
        append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2904
 
2905
      sprintf (tmp_name, "%s%d", "addr2int", i);
2906
      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2907
      add_referenced_var (addr_tmp);
2908
      addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2909
      addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2910
      addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2911
                          addr_tmp_name, addr_stmt);
2912
      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2913
      append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2914
 
2915
      /* The addresses are OR together.  */
2916
 
2917
      if (or_tmp_name != NULL_TREE)
2918
        {
2919
          /* create: or_tmp = or_tmp | addr_tmp */
2920
          sprintf (tmp_name, "%s%d", "orptrs", i);
2921
          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2922
          add_referenced_var (or_tmp);
2923
          new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2924
          or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2925
                            build2 (BIT_IOR_EXPR, int_ptrsize_type,
2926
                                    or_tmp_name,
2927
                                    addr_tmp_name));
2928
          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2929
          append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2930
          or_tmp_name = new_or_tmp_name;
2931
        }
2932
      else
2933
        or_tmp_name = addr_tmp_name;
2934
 
2935
    } /* end for i */
2936
 
2937
  mask_cst = build_int_cst (int_ptrsize_type, mask);
2938
 
2939
  /* create: and_tmp = or_tmp & mask  */
2940
  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2941
  add_referenced_var (and_tmp);
2942
  and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2943
 
2944
  and_stmt = build2 (MODIFY_EXPR, void_type_node,
2945
                     and_tmp_name,
2946
                     build2 (BIT_AND_EXPR, int_ptrsize_type,
2947
                             or_tmp_name, mask_cst));
2948
  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2949
  append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2950
 
2951
  /* Make and_tmp the left operand of the conditional test against zero.
2952
     if and_tmp has a nonzero bit then some address is unaligned.  */
2953
  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2954
  return build2 (EQ_EXPR, boolean_type_node,
2955
                 and_tmp_name, ptrsize_zero);
2956
}
2957
 
2958
 
2959
/* Function vect_transform_loop.
2960
 
2961
   The analysis phase has determined that the loop is vectorizable.
2962
   Vectorize the loop - created vectorized stmts to replace the scalar
2963
   stmts in the loop, and update the loop exit condition.  */
2964
 
2965
void
2966
vect_transform_loop (loop_vec_info loop_vinfo,
2967
                     struct loops *loops ATTRIBUTE_UNUSED)
2968
{
2969
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2970
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2971
  int nbbs = loop->num_nodes;
2972
  block_stmt_iterator si;
2973
  int i;
2974
  tree ratio = NULL;
2975
  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2976
  bitmap_iterator bi;
2977
  unsigned int j;
2978
 
2979
  if (vect_print_dump_info (REPORT_DETAILS))
2980
    fprintf (vect_dump, "=== vec_transform_loop ===");
2981
 
2982
  /* If the loop has data references that may or may not be aligned then
2983
     two versions of the loop need to be generated, one which is vectorized
2984
     and one which isn't.  A test is then generated to control which of the
2985
     loops is executed.  The test checks for the alignment of all of the
2986
     data references that may or may not be aligned. */
2987
 
2988
  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2989
    {
2990
      struct loop *nloop;
2991
      tree cond_expr;
2992
      tree cond_expr_stmt_list = NULL_TREE;
2993
      basic_block condition_bb;
2994
      block_stmt_iterator cond_exp_bsi;
2995
      basic_block merge_bb;
2996
      basic_block new_exit_bb;
2997
      edge new_exit_e, e;
2998
      tree orig_phi, new_phi, arg;
2999
 
3000
      cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
3001
                                                     &cond_expr_stmt_list);
3002
      initialize_original_copy_tables ();
3003
      nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
3004
      free_original_copy_tables();
3005
 
3006
      /** Loop versioning violates an assumption we try to maintain during
3007
         vectorization - that the loop exit block has a single predecessor.
3008
         After versioning, the exit block of both loop versions is the same
3009
         basic block (i.e. it has two predecessors). Just in order to simplify
3010
         following transformations in the vectorizer, we fix this situation
3011
         here by adding a new (empty) block on the exit-edge of the loop,
3012
         with the proper loop-exit phis to maintain loop-closed-form.  **/
3013
 
3014
      merge_bb = loop->single_exit->dest;
3015
      gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
3016
      new_exit_bb = split_edge (loop->single_exit);
3017
      add_bb_to_loop (new_exit_bb, loop->outer);
3018
      new_exit_e = loop->single_exit;
3019
      e = EDGE_SUCC (new_exit_bb, 0);
3020
 
3021
      for (orig_phi = phi_nodes (merge_bb); orig_phi;
3022
           orig_phi = PHI_CHAIN (orig_phi))
3023
        {
3024
          new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
3025
                                     new_exit_bb);
3026
          arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3027
          add_phi_arg (new_phi, arg, new_exit_e);
3028
          SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
3029
        }
3030
 
3031
      /** end loop-exit-fixes after versioning  **/
3032
 
3033
      update_ssa (TODO_update_ssa);
3034
      cond_exp_bsi = bsi_last (condition_bb);
3035
      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
3036
    }
3037
 
3038
  /* CHECKME: we wouldn't need this if we called update_ssa once
3039
     for all loops.  */
3040
  bitmap_zero (vect_vnames_to_rename);
3041
 
3042
  /* Peel the loop if there are data refs with unknown alignment.
3043
     Only one data ref with unknown store is allowed.  */
3044
 
3045
  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
3046
    vect_do_peeling_for_alignment (loop_vinfo, loops);
3047
 
3048
  /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3049
     compile time constant), or it is a constant that doesn't divide by the
3050
     vectorization factor, then an epilog loop needs to be created.
3051
     We therefore duplicate the loop: the original loop will be vectorized,
3052
     and will compute the first (n/VF) iterations. The second copy of the loop
3053
     will remain scalar and will compute the remaining (n%VF) iterations.
3054
     (VF is the vectorization factor).  */
3055
 
3056
  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3057
      || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3058
          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3059
    vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3060
  else
3061
    ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3062
                LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3063
 
3064
  /* 1) Make sure the loop header has exactly two entries
3065
     2) Make sure we have a preheader basic block.  */
3066
 
3067
  gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3068
 
3069
  loop_split_edge_with (loop_preheader_edge (loop), NULL);
3070
 
3071
 
3072
  /* FORNOW: the vectorizer supports only loops which body consist
3073
     of one basic block (header + empty latch). When the vectorizer will
3074
     support more involved loop forms, the order by which the BBs are
3075
     traversed need to be reconsidered.  */
3076
 
3077
  for (i = 0; i < nbbs; i++)
3078
    {
3079
      basic_block bb = bbs[i];
3080
 
3081
      for (si = bsi_start (bb); !bsi_end_p (si);)
3082
        {
3083
          tree stmt = bsi_stmt (si);
3084
          stmt_vec_info stmt_info;
3085
          bool is_store;
3086
 
3087
          if (vect_print_dump_info (REPORT_DETAILS))
3088
            {
3089
              fprintf (vect_dump, "------>vectorizing statement: ");
3090
              print_generic_expr (vect_dump, stmt, TDF_SLIM);
3091
            }
3092
          stmt_info = vinfo_for_stmt (stmt);
3093
          gcc_assert (stmt_info);
3094
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
3095
              && !STMT_VINFO_LIVE_P (stmt_info))
3096
            {
3097
              bsi_next (&si);
3098
              continue;
3099
            }
3100
          /* FORNOW: Verify that all stmts operate on the same number of
3101
                     units and no inner unrolling is necessary.  */
3102
          gcc_assert
3103
                (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
3104
                 == (unsigned HOST_WIDE_INT) vectorization_factor);
3105
 
3106
          /* -------- vectorize statement ------------ */
3107
          if (vect_print_dump_info (REPORT_DETAILS))
3108
            fprintf (vect_dump, "transform statement.");
3109
 
3110
          is_store = vect_transform_stmt (stmt, &si);
3111
          if (is_store)
3112
            {
3113
              /* Free the attached stmt_vec_info and remove the stmt.  */
3114
              stmt_ann_t ann = stmt_ann (stmt);
3115
              free (stmt_info);
3116
              set_stmt_info (ann, NULL);
3117
              bsi_remove (&si, true);
3118
              continue;
3119
            }
3120
 
3121
          bsi_next (&si);
3122
        }                       /* stmts in BB */
3123
    }                           /* BBs in loop */
3124
 
3125
  slpeel_make_loop_iterate_ntimes (loop, ratio);
3126
 
3127
  EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3128
    mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3129
 
3130
  /* The memory tags and pointers in vectorized statements need to
3131
     have their SSA forms updated.  FIXME, why can't this be delayed
3132
     until all the loops have been transformed?  */
3133
  update_ssa (TODO_update_ssa);
3134
 
3135
  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3136
    fprintf (vect_dump, "LOOP VECTORIZED.");
3137
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.