OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [tree-vect-data-refs.c] - Blame information for rev 859

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Data References Analysis and Manipulation Utilities for Vectorization.
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
   and Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "target.h"
30
#include "basic-block.h"
31
#include "diagnostic.h"
32
#include "tree-flow.h"
33
#include "tree-dump.h"
34
#include "cfgloop.h"
35
#include "expr.h"
36
#include "optabs.h"
37
#include "tree-chrec.h"
38
#include "tree-scalar-evolution.h"
39
#include "tree-vectorizer.h"
40
#include "toplev.h"
41
 
42
 
43
/* Return the smallest scalar part of STMT.
44
   This is used to determine the vectype of the stmt. We generally set the
45
   vectype according to the type of the result (lhs). For stmts whose
46
   result-type is different than the type of the arguments (e.g., demotion,
47
   promotion), vectype will be reset appropriately (later).  Note that we have
48
   to visit the smallest datatype in this function, because that determines the
49
   VF. If the smallest datatype in the loop is present only as the rhs of a
50
   promotion operation - we'd miss it.
51
   Such a case, where a variable of this datatype does not appear in the lhs
52
   anywhere in the loop, can only occur if it's an invariant: e.g.:
53
   'int_x = (int) short_inv', which we'd expect to have been optimized away by
54
   invariant motion. However, we cannot rely on invariant motion to always take
55
   invariants out of the loop, and so in the case of promotion we also have to
56
   check the rhs.
57
   LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58
   types.  */
59
 
60
tree
61
vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62
                               HOST_WIDE_INT *rhs_size_unit)
63
{
64
  tree scalar_type = gimple_expr_type (stmt);
65
  HOST_WIDE_INT lhs, rhs;
66
 
67
  lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68
 
69
  if (is_gimple_assign (stmt)
70
      && (gimple_assign_cast_p (stmt)
71
          || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72
          || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73
    {
74
      tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75
 
76
      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77
      if (rhs < lhs)
78
        scalar_type = rhs_type;
79
    }
80
 
81
  *lhs_size_unit = lhs;
82
  *rhs_size_unit = rhs;
83
  return scalar_type;
84
}
85
 
86
 
87
/* Find the place of the data-ref in STMT in the interleaving chain that starts
88
   from FIRST_STMT. Return -1 if the data-ref is not a part of the chain.  */
89
 
90
int
91
vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92
{
93
  gimple next_stmt = first_stmt;
94
  int result = 0;
95
 
96
  if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97
    return -1;
98
 
99
  while (next_stmt && next_stmt != stmt)
100
    {
101
      result++;
102
      next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103
    }
104
 
105
  if (next_stmt)
106
    return result;
107
  else
108
    return -1;
109
}
110
 
111
 
112
/* Function vect_insert_into_interleaving_chain.
113
 
114
   Insert DRA into the interleaving chain of DRB according to DRA's INIT.  */
115
 
116
static void
117
vect_insert_into_interleaving_chain (struct data_reference *dra,
118
                                     struct data_reference *drb)
119
{
120
  gimple prev, next;
121
  tree next_init;
122
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124
 
125
  prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126
  next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127
  while (next)
128
    {
129
      next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130
      if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131
        {
132
          /* Insert here.  */
133
          DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134
          DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135
          return;
136
        }
137
      prev = next;
138
      next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139
    }
140
 
141
  /* We got to the end of the list. Insert here.  */
142
  DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143
  DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144
}
145
 
146
 
147
/* Function vect_update_interleaving_chain.
148
 
149
   For two data-refs DRA and DRB that are a part of a chain interleaved data
150
   accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151
 
152
   There are four possible cases:
153
   1. New stmts - both DRA and DRB are not a part of any chain:
154
      FIRST_DR = DRB
155
      NEXT_DR (DRB) = DRA
156
   2. DRB is a part of a chain and DRA is not:
157
      no need to update FIRST_DR
158
      no need to insert DRB
159
      insert DRA according to init
160
   3. DRA is a part of a chain and DRB is not:
161
      if (init of FIRST_DR > init of DRB)
162
          FIRST_DR = DRB
163
          NEXT(FIRST_DR) = previous FIRST_DR
164
      else
165
          insert DRB according to its init
166
   4. both DRA and DRB are in some interleaving chains:
167
      choose the chain with the smallest init of FIRST_DR
168
      insert the nodes of the second chain into the first one.  */
169
 
170
static void
171
vect_update_interleaving_chain (struct data_reference *drb,
172
                                struct data_reference *dra)
173
{
174
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176
  tree next_init, init_dra_chain, init_drb_chain;
177
  gimple first_a, first_b;
178
  tree node_init;
179
  gimple node, prev, next, first_stmt;
180
 
181
  /* 1. New stmts - both DRA and DRB are not a part of any chain.   */
182
  if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183
    {
184
      DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185
      DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186
      DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187
      return;
188
    }
189
 
190
  /* 2. DRB is a part of a chain and DRA is not.  */
191
  if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192
    {
193
      DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194
      /* Insert DRA into the chain of DRB.  */
195
      vect_insert_into_interleaving_chain (dra, drb);
196
      return;
197
    }
198
 
199
  /* 3. DRA is a part of a chain and DRB is not.  */
200
  if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201
    {
202
      gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203
      tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204
                                                              old_first_stmt)));
205
      gimple tmp;
206
 
207
      if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208
        {
209
          /* DRB's init is smaller than the init of the stmt previously marked
210
             as the first stmt of the interleaving chain of DRA. Therefore, we
211
             update FIRST_STMT and put DRB in the head of the list.  */
212
          DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213
          DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
214
 
215
          /* Update all the stmts in the list to point to the new FIRST_STMT.  */
216
          tmp = old_first_stmt;
217
          while (tmp)
218
            {
219
              DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220
              tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221
            }
222
        }
223
      else
224
        {
225
          /* Insert DRB in the list of DRA.  */
226
          vect_insert_into_interleaving_chain (drb, dra);
227
          DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
228
        }
229
      return;
230
    }
231
 
232
  /* 4. both DRA and DRB are in some interleaving chains.  */
233
  first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234
  first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235
  if (first_a == first_b)
236
    return;
237
  init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238
  init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239
 
240
  if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241
    {
242
      /* Insert the nodes of DRA chain into the DRB chain.
243
         After inserting a node, continue from this node of the DRB chain (don't
244
         start from the beginning.  */
245
      node = DR_GROUP_FIRST_DR (stmtinfo_a);
246
      prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247
      first_stmt = first_b;
248
    }
249
  else
250
    {
251
      /* Insert the nodes of DRB chain into the DRA chain.
252
         After inserting a node, continue from this node of the DRA chain (don't
253
         start from the beginning.  */
254
      node = DR_GROUP_FIRST_DR (stmtinfo_b);
255
      prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256
      first_stmt = first_a;
257
    }
258
 
259
  while (node)
260
    {
261
      node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262
      next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263
      while (next)
264
        {
265
          next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266
          if (tree_int_cst_compare (next_init, node_init) > 0)
267
            {
268
              /* Insert here.  */
269
              DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270
              DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271
              prev = node;
272
              break;
273
            }
274
          prev = next;
275
          next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276
        }
277
      if (!next)
278
        {
279
          /* We got to the end of the list. Insert here.  */
280
          DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281
          DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282
          prev = node;
283
        }
284
      DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285
      node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
286
    }
287
}
288
 
289
 
290
/* Function vect_equal_offsets.
291
 
292
   Check if OFFSET1 and OFFSET2 are identical expressions.  */
293
 
294
static bool
295
vect_equal_offsets (tree offset1, tree offset2)
296
{
297
  bool res;
298
 
299
  STRIP_NOPS (offset1);
300
  STRIP_NOPS (offset2);
301
 
302
  if (offset1 == offset2)
303
    return true;
304
 
305
  if (TREE_CODE (offset1) != TREE_CODE (offset2)
306
      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
307
    return false;
308
 
309
  res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
310
                            TREE_OPERAND (offset2, 0));
311
 
312
  if (!res || !BINARY_CLASS_P (offset1))
313
    return res;
314
 
315
  res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
316
                            TREE_OPERAND (offset2, 1));
317
 
318
  return res;
319
}
320
 
321
 
322
/* Function vect_check_interleaving.
323
 
324
   Check if DRA and DRB are a part of interleaving. In case they are, insert
325
   DRA and DRB in an interleaving chain.  */
326
 
327
static bool
328
vect_check_interleaving (struct data_reference *dra,
329
                         struct data_reference *drb)
330
{
331
  HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
332
 
333
  /* Check that the data-refs have same first location (except init) and they
334
     are both either store or load (not load and store).  */
335
  if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
336
       && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
337
           || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
338
           || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
339
           != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
340
      || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
341
      || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
342
      || DR_IS_READ (dra) != DR_IS_READ (drb))
343
    return false;
344
 
345
  /* Check:
346
     1. data-refs are of the same type
347
     2. their steps are equal
348
     3. the step (if greater than zero) is greater than the difference between
349
        data-refs' inits.  */
350
  type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
351
  type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
352
 
353
  if (type_size_a != type_size_b
354
      || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
355
      || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
356
                              TREE_TYPE (DR_REF (drb))))
357
    return false;
358
 
359
  init_a = TREE_INT_CST_LOW (DR_INIT (dra));
360
  init_b = TREE_INT_CST_LOW (DR_INIT (drb));
361
  step = TREE_INT_CST_LOW (DR_STEP (dra));
362
 
363
  if (init_a > init_b)
364
    {
365
      /* If init_a == init_b + the size of the type * k, we have an interleaving,
366
         and DRB is accessed before DRA.  */
367
      diff_mod_size = (init_a - init_b) % type_size_a;
368
 
369
      if (step && (init_a - init_b) > step)
370
         return false;
371
 
372
      if (diff_mod_size == 0)
373
        {
374
          vect_update_interleaving_chain (drb, dra);
375
          if (vect_print_dump_info (REPORT_DR_DETAILS))
376
            {
377
              fprintf (vect_dump, "Detected interleaving ");
378
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
379
              fprintf (vect_dump, " and ");
380
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
381
            }
382
          return true;
383
        }
384
    }
385
  else
386
    {
387
      /* If init_b == init_a + the size of the type * k, we have an
388
         interleaving, and DRA is accessed before DRB.  */
389
      diff_mod_size = (init_b - init_a) % type_size_a;
390
 
391
      if (step && (init_b - init_a) > step)
392
         return false;
393
 
394
      if (diff_mod_size == 0)
395
        {
396
          vect_update_interleaving_chain (dra, drb);
397
          if (vect_print_dump_info (REPORT_DR_DETAILS))
398
            {
399
              fprintf (vect_dump, "Detected interleaving ");
400
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
401
              fprintf (vect_dump, " and ");
402
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
403
            }
404
          return true;
405
        }
406
    }
407
 
408
  return false;
409
}
410
 
411
/* Check if data references pointed by DR_I and DR_J are same or
412
   belong to same interleaving group.  Return FALSE if drs are
413
   different, otherwise return TRUE.  */
414
 
415
static bool
416
vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
417
{
418
  gimple stmt_i = DR_STMT (dr_i);
419
  gimple stmt_j = DR_STMT (dr_j);
420
 
421
  if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
422
      || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
423
            && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
424
            && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
425
                == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
426
    return true;
427
  else
428
    return false;
429
}
430
 
431
/* If address ranges represented by DDR_I and DDR_J are equal,
432
   return TRUE, otherwise return FALSE.  */
433
 
434
static bool
435
vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
436
{
437
  if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
438
       && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
439
      || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
440
          && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
441
    return true;
442
  else
443
    return false;
444
}
445
 
446
/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
447
   tested at run-time.  Return TRUE if DDR was successfully inserted.
448
   Return false if versioning is not supported.  */
449
 
450
static bool
451
vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
452
{
453
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
454
 
455
  if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
456
    return false;
457
 
458
  if (vect_print_dump_info (REPORT_DR_DETAILS))
459
    {
460
      fprintf (vect_dump, "mark for run-time aliasing test between ");
461
      print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
462
      fprintf (vect_dump, " and ");
463
      print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
464
    }
465
 
466
  if (optimize_loop_nest_for_size_p (loop))
467
    {
468
      if (vect_print_dump_info (REPORT_DR_DETAILS))
469
        fprintf (vect_dump, "versioning not supported when optimizing for size.");
470
      return false;
471
    }
472
 
473
  /* FORNOW: We don't support versioning with outer-loop vectorization.  */
474
  if (loop->inner)
475
    {
476
      if (vect_print_dump_info (REPORT_DR_DETAILS))
477
        fprintf (vect_dump, "versioning not yet supported for outer-loops.");
478
      return false;
479
    }
480
 
481
  VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
482
  return true;
483
}
484
 
485
 
486
/* Function vect_analyze_data_ref_dependence.
487
 
488
   Return TRUE if there (might) exist a dependence between a memory-reference
489
   DRA and a memory-reference DRB.  When versioning for alias may check a
490
   dependence at run-time, return FALSE.  */
491
 
492
static bool
493
vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
494
                                  loop_vec_info loop_vinfo)
495
{
496
  unsigned int i;
497
  struct loop *loop = NULL;
498
  int vectorization_factor = 0;
499
  struct data_reference *dra = DDR_A (ddr);
500
  struct data_reference *drb = DDR_B (ddr);
501
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
502
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
503
  int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
504
  int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
505
  lambda_vector dist_v;
506
  unsigned int loop_depth;
507
 
508
  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
509
    {
510
      /* Independent data accesses.  */
511
      vect_check_interleaving (dra, drb);
512
      return false;
513
    }
514
 
515
  if (loop_vinfo)
516
    {
517
      loop = LOOP_VINFO_LOOP (loop_vinfo);
518
      vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
519
    }
520
 
521
  if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
522
    return false;
523
 
524
  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
525
    {
526
      if (loop_vinfo)
527
        {
528
          if (vect_print_dump_info (REPORT_DR_DETAILS))
529
            {
530
              fprintf (vect_dump, "versioning for alias required: "
531
                                  "can't determine dependence between ");
532
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
533
              fprintf (vect_dump, " and ");
534
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
535
            }
536
 
537
          /* Add to list of ddrs that need to be tested at run-time.  */
538
          return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
539
        }
540
 
541
      /* When vectorizing a basic block unknown depnedence can still mean
542
         strided access.  */
543
      if (vect_check_interleaving (dra, drb))
544
         return false;
545
 
546
      if (vect_print_dump_info (REPORT_DR_DETAILS))
547
        {
548
          fprintf (vect_dump, "can't determine dependence between ");
549
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
550
          fprintf (vect_dump, " and ");
551
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
552
        }
553
 
554
      return true;
555
    }
556
 
557
  /* Versioning for alias is not yet supported for basic block SLP, and
558
     dependence distance is unapplicable, hence, in case of known data
559
     dependence, basic block vectorization is impossible for now.  */
560
  if (!loop_vinfo)
561
    {
562
      if (dra != drb && vect_check_interleaving (dra, drb))
563
        return false;
564
 
565
      if (vect_print_dump_info (REPORT_DR_DETAILS))
566
        {
567
          fprintf (vect_dump, "determined dependence between ");
568
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
569
          fprintf (vect_dump, " and ");
570
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
571
        }
572
 
573
      return true;
574
    }
575
 
576
  /* Loop-based vectorization and known data dependence.  */
577
  if (DDR_NUM_DIST_VECTS (ddr) == 0)
578
    {
579
      if (vect_print_dump_info (REPORT_DR_DETAILS))
580
        {
581
          fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
582
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
583
          fprintf (vect_dump, " and ");
584
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
585
        }
586
      /* Add to list of ddrs that need to be tested at run-time.  */
587
      return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
588
    }
589
 
590
  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
591
  for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
592
    {
593
      int dist = dist_v[loop_depth];
594
 
595
      if (vect_print_dump_info (REPORT_DR_DETAILS))
596
        fprintf (vect_dump, "dependence distance  = %d.", dist);
597
 
598
      /* Same loop iteration.  */
599
      if (dist % vectorization_factor == 0 && dra_size == drb_size)
600
        {
601
          /* Two references with distance zero have the same alignment.  */
602
          VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
603
          VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
604
          if (vect_print_dump_info (REPORT_ALIGNMENT))
605
            fprintf (vect_dump, "accesses have the same alignment.");
606
          if (vect_print_dump_info (REPORT_DR_DETAILS))
607
            {
608
              fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
609
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610
              fprintf (vect_dump, " and ");
611
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
612
            }
613
 
614
          /* For interleaving, mark that there is a read-write dependency if
615
             necessary. We check before that one of the data-refs is store.  */
616
          if (DR_IS_READ (dra))
617
            DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
618
          else
619
            {
620
              if (DR_IS_READ (drb))
621
                DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
622
            }
623
 
624
          continue;
625
        }
626
 
627
      if (abs (dist) >= vectorization_factor
628
          || (dist > 0 && DDR_REVERSED_P (ddr)))
629
        {
630
          /* Dependence distance does not create dependence, as far as
631
             vectorization is concerned, in this case. If DDR_REVERSED_P the
632
             order of the data-refs in DDR was reversed (to make distance
633
             vector positive), and the actual distance is negative.  */
634
          if (vect_print_dump_info (REPORT_DR_DETAILS))
635
            fprintf (vect_dump, "dependence distance >= VF or negative.");
636
          continue;
637
        }
638
 
639
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
640
        {
641
          fprintf (vect_dump, "not vectorized, possible dependence "
642
                              "between data-refs ");
643
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
644
          fprintf (vect_dump, " and ");
645
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
646
        }
647
 
648
      return true;
649
    }
650
 
651
  return false;
652
}
653
 
654
/* Function vect_analyze_data_ref_dependences.
655
 
656
   Examine all the data references in the loop, and make sure there do not
657
   exist any data dependences between them.  */
658
 
659
bool
660
vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
661
                                   bb_vec_info bb_vinfo)
662
{
663
  unsigned int i;
664
  VEC (ddr_p, heap) *ddrs = NULL;
665
  struct data_dependence_relation *ddr;
666
 
667
  if (vect_print_dump_info (REPORT_DETAILS))
668
    fprintf (vect_dump, "=== vect_analyze_dependences ===");
669
 
670
  if (loop_vinfo)
671
    ddrs = LOOP_VINFO_DDRS (loop_vinfo);
672
  else
673
    ddrs = BB_VINFO_DDRS (bb_vinfo);
674
 
675
  for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
676
    if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
677
      return false;
678
 
679
  return true;
680
}
681
 
682
 
683
/* Function vect_compute_data_ref_alignment
684
 
685
   Compute the misalignment of the data reference DR.
686
 
687
   Output:
688
   1. If during the misalignment computation it is found that the data reference
689
      cannot be vectorized then false is returned.
690
   2. DR_MISALIGNMENT (DR) is defined.
691
 
692
   FOR NOW: No analysis is actually performed. Misalignment is calculated
693
   only for trivial cases. TODO.  */
694
 
695
static bool
696
vect_compute_data_ref_alignment (struct data_reference *dr)
697
{
698
  gimple stmt = DR_STMT (dr);
699
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
700
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
701
  struct loop *loop = NULL;
702
  tree ref = DR_REF (dr);
703
  tree vectype;
704
  tree base, base_addr;
705
  bool base_aligned;
706
  tree misalign;
707
  tree aligned_to, alignment;
708
 
709
  if (vect_print_dump_info (REPORT_DETAILS))
710
    fprintf (vect_dump, "vect_compute_data_ref_alignment:");
711
 
712
  if (loop_vinfo)
713
    loop = LOOP_VINFO_LOOP (loop_vinfo);
714
 
715
  /* Initialize misalignment to unknown.  */
716
  SET_DR_MISALIGNMENT (dr, -1);
717
 
718
  misalign = DR_INIT (dr);
719
  aligned_to = DR_ALIGNED_TO (dr);
720
  base_addr = DR_BASE_ADDRESS (dr);
721
  vectype = STMT_VINFO_VECTYPE (stmt_info);
722
 
723
  /* In case the dataref is in an inner-loop of the loop that is being
724
     vectorized (LOOP), we use the base and misalignment information
725
     relative to the outer-loop (LOOP). This is ok only if the misalignment
726
     stays the same throughout the execution of the inner-loop, which is why
727
     we have to check that the stride of the dataref in the inner-loop evenly
728
     divides by the vector size.  */
729
  if (loop && nested_in_vect_loop_p (loop, stmt))
730
    {
731
      tree step = DR_STEP (dr);
732
      HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
733
 
734
      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
735
        {
736
          if (vect_print_dump_info (REPORT_ALIGNMENT))
737
            fprintf (vect_dump, "inner step divides the vector-size.");
738
          misalign = STMT_VINFO_DR_INIT (stmt_info);
739
          aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
740
          base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
741
        }
742
      else
743
        {
744
          if (vect_print_dump_info (REPORT_ALIGNMENT))
745
            fprintf (vect_dump, "inner step doesn't divide the vector-size.");
746
          misalign = NULL_TREE;
747
        }
748
    }
749
 
750
  base = build_fold_indirect_ref (base_addr);
751
  alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
752
 
753
  if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
754
      || !misalign)
755
    {
756
      if (vect_print_dump_info (REPORT_ALIGNMENT))
757
        {
758
          fprintf (vect_dump, "Unknown alignment for access: ");
759
          print_generic_expr (vect_dump, base, TDF_SLIM);
760
        }
761
      return true;
762
    }
763
 
764
  if ((DECL_P (base)
765
       && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
766
                                alignment) >= 0)
767
      || (TREE_CODE (base_addr) == SSA_NAME
768
          && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
769
                                                      TREE_TYPE (base_addr)))),
770
                                   alignment) >= 0))
771
    base_aligned = true;
772
  else
773
    base_aligned = false;
774
 
775
  if (!base_aligned)
776
    {
777
      /* Do not change the alignment of global variables if
778
         flag_section_anchors is enabled.  */
779
      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
780
          || (TREE_STATIC (base) && flag_section_anchors))
781
        {
782
          if (vect_print_dump_info (REPORT_DETAILS))
783
            {
784
              fprintf (vect_dump, "can't force alignment of ref: ");
785
              print_generic_expr (vect_dump, ref, TDF_SLIM);
786
            }
787
          return true;
788
        }
789
 
790
      /* Force the alignment of the decl.
791
         NOTE: This is the only change to the code we make during
792
         the analysis phase, before deciding to vectorize the loop.  */
793
      if (vect_print_dump_info (REPORT_DETAILS))
794
        fprintf (vect_dump, "force alignment");
795
      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
796
      DECL_USER_ALIGN (base) = 1;
797
    }
798
 
799
  /* At this point we assume that the base is aligned.  */
800
  gcc_assert (base_aligned
801
              || (TREE_CODE (base) == VAR_DECL
802
                  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
803
 
804
  /* Modulo alignment.  */
805
  misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
806
 
807
  if (!host_integerp (misalign, 1))
808
    {
809
      /* Negative or overflowed misalignment value.  */
810
      if (vect_print_dump_info (REPORT_DETAILS))
811
        fprintf (vect_dump, "unexpected misalign value");
812
      return false;
813
    }
814
 
815
  SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
816
 
817
  if (vect_print_dump_info (REPORT_DETAILS))
818
    {
819
      fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
820
      print_generic_expr (vect_dump, ref, TDF_SLIM);
821
    }
822
 
823
  return true;
824
}
825
 
826
 
827
/* Function vect_compute_data_refs_alignment
828
 
829
   Compute the misalignment of data references in the loop.
830
   Return FALSE if a data reference is found that cannot be vectorized.  */
831
 
832
static bool
833
vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
834
                                  bb_vec_info bb_vinfo)
835
{
836
  VEC (data_reference_p, heap) *datarefs;
837
  struct data_reference *dr;
838
  unsigned int i;
839
 
840
  if (loop_vinfo)
841
    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
842
  else
843
    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
844
 
845
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
846
    if (!vect_compute_data_ref_alignment (dr))
847
      return false;
848
 
849
  return true;
850
}
851
 
852
 
853
/* Function vect_update_misalignment_for_peel
854
 
855
   DR - the data reference whose misalignment is to be adjusted.
856
   DR_PEEL - the data reference whose misalignment is being made
857
             zero in the vector loop by the peel.
858
   NPEEL - the number of iterations in the peel loop if the misalignment
859
           of DR_PEEL is known at compile time.  */
860
 
861
static void
862
vect_update_misalignment_for_peel (struct data_reference *dr,
863
                                   struct data_reference *dr_peel, int npeel)
864
{
865
  unsigned int i;
866
  VEC(dr_p,heap) *same_align_drs;
867
  struct data_reference *current_dr;
868
  int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
869
  int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
870
  stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
871
  stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
872
 
873
 /* For interleaved data accesses the step in the loop must be multiplied by
874
     the size of the interleaving group.  */
875
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
876
    dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
877
  if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
878
    dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
879
 
880
  /* It can be assumed that the data refs with the same alignment as dr_peel
881
     are aligned in the vector loop.  */
882
  same_align_drs
883
    = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
884
  for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
885
    {
886
      if (current_dr != dr)
887
        continue;
888
      gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
889
                  DR_MISALIGNMENT (dr_peel) / dr_peel_size);
890
      SET_DR_MISALIGNMENT (dr, 0);
891
      return;
892
    }
893
 
894
  if (known_alignment_for_access_p (dr)
895
      && known_alignment_for_access_p (dr_peel))
896
    {
897
      int misal = DR_MISALIGNMENT (dr);
898
      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
899
      misal += npeel * dr_size;
900
      misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
901
      SET_DR_MISALIGNMENT (dr, misal);
902
      return;
903
    }
904
 
905
  if (vect_print_dump_info (REPORT_DETAILS))
906
    fprintf (vect_dump, "Setting misalignment to -1.");
907
  SET_DR_MISALIGNMENT (dr, -1);
908
}
909
 
910
 
911
/* Function vect_verify_datarefs_alignment
912
 
913
   Return TRUE if all data references in the loop can be
914
   handled with respect to alignment.  */
915
 
916
bool
917
vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
918
{
919
  VEC (data_reference_p, heap) *datarefs;
920
  struct data_reference *dr;
921
  enum dr_alignment_support supportable_dr_alignment;
922
  unsigned int i;
923
 
924
  if (loop_vinfo)
925
    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
926
  else
927
    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
928
 
929
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
930
    {
931
      gimple stmt = DR_STMT (dr);
932
      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
933
 
934
      /* For interleaving, only the alignment of the first access matters.  */
935
      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
936
          && DR_GROUP_FIRST_DR (stmt_info) != stmt)
937
        continue;
938
 
939
      supportable_dr_alignment = vect_supportable_dr_alignment (dr);
940
      if (!supportable_dr_alignment)
941
        {
942
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
943
            {
944
              if (DR_IS_READ (dr))
945
                fprintf (vect_dump,
946
                         "not vectorized: unsupported unaligned load.");
947
              else
948
                fprintf (vect_dump,
949
                         "not vectorized: unsupported unaligned store.");
950
            }
951
          return false;
952
        }
953
      if (supportable_dr_alignment != dr_aligned
954
          && vect_print_dump_info (REPORT_ALIGNMENT))
955
        fprintf (vect_dump, "Vectorizing an unaligned access.");
956
    }
957
  return true;
958
}
959
 
960
 
961
/* Function vector_alignment_reachable_p
962
 
963
   Return true if vector alignment for DR is reachable by peeling
964
   a few loop iterations.  Return false otherwise.  */
965
 
966
static bool
967
vector_alignment_reachable_p (struct data_reference *dr)
968
{
969
  gimple stmt = DR_STMT (dr);
970
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
971
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
972
 
973
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
974
    {
975
      /* For interleaved access we peel only if number of iterations in
976
         the prolog loop ({VF - misalignment}), is a multiple of the
977
         number of the interleaved accesses.  */
978
      int elem_size, mis_in_elements;
979
      int nelements = TYPE_VECTOR_SUBPARTS (vectype);
980
 
981
      /* FORNOW: handle only known alignment.  */
982
      if (!known_alignment_for_access_p (dr))
983
        return false;
984
 
985
      elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
986
      mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
987
 
988
      if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
989
        return false;
990
    }
991
 
992
  /* If misalignment is known at the compile time then allow peeling
993
     only if natural alignment is reachable through peeling.  */
994
  if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
995
    {
996
      HOST_WIDE_INT elmsize =
997
                int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
998
      if (vect_print_dump_info (REPORT_DETAILS))
999
        {
1000
          fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1001
          fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1002
        }
1003
      if (DR_MISALIGNMENT (dr) % elmsize)
1004
        {
1005
          if (vect_print_dump_info (REPORT_DETAILS))
1006
            fprintf (vect_dump, "data size does not divide the misalignment.\n");
1007
          return false;
1008
        }
1009
    }
1010
 
1011
  if (!known_alignment_for_access_p (dr))
1012
    {
1013
      tree type = (TREE_TYPE (DR_REF (dr)));
1014
      tree ba = DR_BASE_OBJECT (dr);
1015
      bool is_packed = false;
1016
 
1017
      if (ba)
1018
        is_packed = contains_packed_reference (ba);
1019
 
1020
      if (vect_print_dump_info (REPORT_DETAILS))
1021
        fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1022
      if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1023
        return true;
1024
      else
1025
        return false;
1026
    }
1027
 
1028
  return true;
1029
}
1030
 
1031
/* Function vect_enhance_data_refs_alignment
1032
 
1033
   This pass will use loop versioning and loop peeling in order to enhance
1034
   the alignment of data references in the loop.
1035
 
1036
   FOR NOW: we assume that whatever versioning/peeling takes place, only the
1037
   original loop is to be vectorized; Any other loops that are created by
1038
   the transformations performed in this pass - are not supposed to be
1039
   vectorized. This restriction will be relaxed.
1040
 
1041
   This pass will require a cost model to guide it whether to apply peeling
1042
   or versioning or a combination of the two. For example, the scheme that
1043
   intel uses when given a loop with several memory accesses, is as follows:
1044
   choose one memory access ('p') which alignment you want to force by doing
1045
   peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1046
   other accesses are not necessarily aligned, or (2) use loop versioning to
1047
   generate one loop in which all accesses are aligned, and another loop in
1048
   which only 'p' is necessarily aligned.
1049
 
1050
   ("Automatic Intra-Register Vectorization for the Intel Architecture",
1051
   Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1052
   Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1053
 
1054
   Devising a cost model is the most critical aspect of this work. It will
1055
   guide us on which access to peel for, whether to use loop versioning, how
1056
   many versions to create, etc. The cost model will probably consist of
1057
   generic considerations as well as target specific considerations (on
1058
   powerpc for example, misaligned stores are more painful than misaligned
1059
   loads).
1060
 
1061
   Here are the general steps involved in alignment enhancements:
1062
 
1063
     -- original loop, before alignment analysis:
1064
        for (i=0; i<N; i++){
1065
          x = q[i];                     # DR_MISALIGNMENT(q) = unknown
1066
          p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1067
        }
1068
 
1069
     -- After vect_compute_data_refs_alignment:
1070
        for (i=0; i<N; i++){
1071
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1072
          p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1073
        }
1074
 
1075
     -- Possibility 1: we do loop versioning:
1076
     if (p is aligned) {
1077
        for (i=0; i<N; i++){    # loop 1A
1078
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1079
          p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1080
        }
1081
     }
1082
     else {
1083
        for (i=0; i<N; i++){    # loop 1B
1084
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1085
          p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1086
        }
1087
     }
1088
 
1089
     -- Possibility 2: we do loop peeling:
1090
     for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1091
        x = q[i];
1092
        p[i] = y;
1093
     }
1094
     for (i = 3; i < N; i++){   # loop 2A
1095
        x = q[i];                       # DR_MISALIGNMENT(q) = 0
1096
        p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
1097
     }
1098
 
1099
     -- Possibility 3: combination of loop peeling and versioning:
1100
     for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1101
        x = q[i];
1102
        p[i] = y;
1103
     }
1104
     if (p is aligned) {
1105
        for (i = 3; i<N; i++){  # loop 3A
1106
          x = q[i];                     # DR_MISALIGNMENT(q) = 0
1107
          p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1108
        }
1109
     }
1110
     else {
1111
        for (i = 3; i<N; i++){  # loop 3B
1112
          x = q[i];                     # DR_MISALIGNMENT(q) = 0
1113
          p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1114
        }
1115
     }
1116
 
1117
     These loops are later passed to loop_transform to be vectorized. The
1118
     vectorizer will use the alignment information to guide the transformation
1119
     (whether to generate regular loads/stores, or with special handling for
1120
     misalignment).  */
1121
 
1122
bool
1123
vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1124
{
1125
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1126
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1127
  enum dr_alignment_support supportable_dr_alignment;
1128
  struct data_reference *dr0 = NULL;
1129
  struct data_reference *dr;
1130
  unsigned int i;
1131
  bool do_peeling = false;
1132
  bool do_versioning = false;
1133
  bool stat;
1134
  gimple stmt;
1135
  stmt_vec_info stmt_info;
1136
  int vect_versioning_for_alias_required;
1137
 
1138
  if (vect_print_dump_info (REPORT_DETAILS))
1139
    fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1140
 
1141
  /* While cost model enhancements are expected in the future, the high level
1142
     view of the code at this time is as follows:
1143
 
1144
     A) If there is a misaligned access then see if peeling to align
1145
        this access can make all data references satisfy
1146
        vect_supportable_dr_alignment.  If so, update data structures
1147
        as needed and return true.
1148
 
1149
     B) If peeling wasn't possible and there is a data reference with an
1150
        unknown misalignment that does not satisfy vect_supportable_dr_alignment
1151
        then see if loop versioning checks can be used to make all data
1152
        references satisfy vect_supportable_dr_alignment.  If so, update
1153
        data structures as needed and return true.
1154
 
1155
     C) If neither peeling nor versioning were successful then return false if
1156
        any data reference does not satisfy vect_supportable_dr_alignment.
1157
 
1158
     D) Return true (all data references satisfy vect_supportable_dr_alignment).
1159
 
1160
     Note, Possibility 3 above (which is peeling and versioning together) is not
1161
     being done at this time.  */
1162
 
1163
  /* (1) Peeling to force alignment.  */
1164
 
1165
  /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1166
     Considerations:
1167
     + How many accesses will become aligned due to the peeling
1168
     - How many accesses will become unaligned due to the peeling,
1169
       and the cost of misaligned accesses.
1170
     - The cost of peeling (the extra runtime checks, the increase
1171
       in code size).
1172
 
1173
     The scheme we use FORNOW: peel to force the alignment of the first
1174
     unsupported misaligned access in the loop.
1175
 
1176
     TODO: Use a cost model.  */
1177
 
1178
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1179
    {
1180
      stmt = DR_STMT (dr);
1181
      stmt_info = vinfo_for_stmt (stmt);
1182
 
1183
      /* For interleaving, only the alignment of the first access
1184
         matters.  */
1185
      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1186
          && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1187
        continue;
1188
 
1189
      if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1190
        {
1191
          do_peeling = vector_alignment_reachable_p (dr);
1192
          if (do_peeling)
1193
            dr0 = dr;
1194
          if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1195
            fprintf (vect_dump, "vector alignment may not be reachable");
1196
          break;
1197
        }
1198
    }
1199
 
1200
  vect_versioning_for_alias_required
1201
    = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1202
 
1203
  /* Temporarily, if versioning for alias is required, we disable peeling
1204
     until we support peeling and versioning.  Often peeling for alignment
1205
     will require peeling for loop-bound, which in turn requires that we
1206
     know how to adjust the loop ivs after the loop.  */
1207
  if (vect_versioning_for_alias_required
1208
      || !vect_can_advance_ivs_p (loop_vinfo)
1209
      || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1210
    do_peeling = false;
1211
 
1212
  if (do_peeling)
1213
    {
1214
      int mis;
1215
      int npeel = 0;
1216
      gimple stmt = DR_STMT (dr0);
1217
      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218
      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1219
      int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1220
 
1221
      if (known_alignment_for_access_p (dr0))
1222
        {
1223
          /* Since it's known at compile time, compute the number of iterations
1224
             in the peeled loop (the peeling factor) for use in updating
1225
             DR_MISALIGNMENT values.  The peeling factor is the vectorization
1226
             factor minus the misalignment as an element count.  */
1227
          mis = DR_MISALIGNMENT (dr0);
1228
          mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1229
          npeel = nelements - mis;
1230
 
1231
          /* For interleaved data access every iteration accesses all the
1232
             members of the group, therefore we divide the number of iterations
1233
             by the group size.  */
1234
          stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1235
          if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1236
            npeel /= DR_GROUP_SIZE (stmt_info);
1237
 
1238
          if (vect_print_dump_info (REPORT_DETAILS))
1239
            fprintf (vect_dump, "Try peeling by %d", npeel);
1240
        }
1241
 
1242
      /* Ensure that all data refs can be vectorized after the peel.  */
1243
      for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1244
        {
1245
          int save_misalignment;
1246
 
1247
          if (dr == dr0)
1248
            continue;
1249
 
1250
          stmt = DR_STMT (dr);
1251
          stmt_info = vinfo_for_stmt (stmt);
1252
          /* For interleaving, only the alignment of the first access
1253
            matters.  */
1254
          if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1255
              && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1256
            continue;
1257
 
1258
          save_misalignment = DR_MISALIGNMENT (dr);
1259
          vect_update_misalignment_for_peel (dr, dr0, npeel);
1260
          supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1261
          SET_DR_MISALIGNMENT (dr, save_misalignment);
1262
 
1263
          if (!supportable_dr_alignment)
1264
            {
1265
              do_peeling = false;
1266
              break;
1267
            }
1268
        }
1269
 
1270
      if (do_peeling)
1271
        {
1272
          /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1273
             If the misalignment of DR_i is identical to that of dr0 then set
1274
             DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1275
             dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1276
             by the peeling factor times the element size of DR_i (MOD the
1277
             vectorization factor times the size).  Otherwise, the
1278
             misalignment of DR_i must be set to unknown.  */
1279
          for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1280
            if (dr != dr0)
1281
              vect_update_misalignment_for_peel (dr, dr0, npeel);
1282
 
1283
          LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1284
          LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1285
          SET_DR_MISALIGNMENT (dr0, 0);
1286
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1287
            fprintf (vect_dump, "Alignment of access forced using peeling.");
1288
 
1289
          if (vect_print_dump_info (REPORT_DETAILS))
1290
            fprintf (vect_dump, "Peeling for alignment will be applied.");
1291
 
1292
          stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1293
          gcc_assert (stat);
1294
          return stat;
1295
        }
1296
    }
1297
 
1298
 
1299
  /* (2) Versioning to force alignment.  */
1300
 
1301
  /* Try versioning if:
1302
     1) flag_tree_vect_loop_version is TRUE
1303
     2) optimize loop for speed
1304
     3) there is at least one unsupported misaligned data ref with an unknown
1305
        misalignment, and
1306
     4) all misaligned data refs with a known misalignment are supported, and
1307
     5) the number of runtime alignment checks is within reason.  */
1308
 
1309
  do_versioning =
1310
        flag_tree_vect_loop_version
1311
        && optimize_loop_nest_for_speed_p (loop)
1312
        && (!loop->inner); /* FORNOW */
1313
 
1314
  if (do_versioning)
1315
    {
1316
      for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1317
        {
1318
          stmt = DR_STMT (dr);
1319
          stmt_info = vinfo_for_stmt (stmt);
1320
 
1321
          /* For interleaving, only the alignment of the first access
1322
             matters.  */
1323
          if (aligned_access_p (dr)
1324
              || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1325
                  && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1326
            continue;
1327
 
1328
          supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1329
 
1330
          if (!supportable_dr_alignment)
1331
            {
1332
              gimple stmt;
1333
              int mask;
1334
              tree vectype;
1335
 
1336
              if (known_alignment_for_access_p (dr)
1337
                  || VEC_length (gimple,
1338
                                 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1339
                     >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1340
                {
1341
                  do_versioning = false;
1342
                  break;
1343
                }
1344
 
1345
              stmt = DR_STMT (dr);
1346
              vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1347
              gcc_assert (vectype);
1348
 
1349
              /* The rightmost bits of an aligned address must be zeros.
1350
                 Construct the mask needed for this test.  For example,
1351
                 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1352
                 mask must be 15 = 0xf. */
1353
              mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1354
 
1355
              /* FORNOW: use the same mask to test all potentially unaligned
1356
                 references in the loop.  The vectorizer currently supports
1357
                 a single vector size, see the reference to
1358
                 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1359
                 vectorization factor is computed.  */
1360
              gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1361
                          || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1362
              LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1363
              VEC_safe_push (gimple, heap,
1364
                             LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1365
                             DR_STMT (dr));
1366
            }
1367
        }
1368
 
1369
      /* Versioning requires at least one misaligned data reference.  */
1370
      if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1371
        do_versioning = false;
1372
      else if (!do_versioning)
1373
        VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1374
    }
1375
 
1376
  if (do_versioning)
1377
    {
1378
      VEC(gimple,heap) *may_misalign_stmts
1379
        = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1380
      gimple stmt;
1381
 
1382
      /* It can now be assumed that the data references in the statements
1383
         in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1384
         of the loop being vectorized.  */
1385
      for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1386
        {
1387
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1388
          dr = STMT_VINFO_DATA_REF (stmt_info);
1389
          SET_DR_MISALIGNMENT (dr, 0);
1390
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1391
            fprintf (vect_dump, "Alignment of access forced using versioning.");
1392
        }
1393
 
1394
      if (vect_print_dump_info (REPORT_DETAILS))
1395
        fprintf (vect_dump, "Versioning for alignment will be applied.");
1396
 
1397
      /* Peeling and versioning can't be done together at this time.  */
1398
      gcc_assert (! (do_peeling && do_versioning));
1399
 
1400
      stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1401
      gcc_assert (stat);
1402
      return stat;
1403
    }
1404
 
1405
  /* This point is reached if neither peeling nor versioning is being done.  */
1406
  gcc_assert (! (do_peeling || do_versioning));
1407
 
1408
  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1409
  return stat;
1410
}
1411
 
1412
 
1413
/* Function vect_analyze_data_refs_alignment
1414
 
1415
   Analyze the alignment of the data-references in the loop.
1416
   Return FALSE if a data reference is found that cannot be vectorized.  */
1417
 
1418
bool
1419
vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1420
                                  bb_vec_info bb_vinfo)
1421
{
1422
  if (vect_print_dump_info (REPORT_DETAILS))
1423
    fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1424
 
1425
  if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1426
    {
1427
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1428
        fprintf (vect_dump,
1429
                 "not vectorized: can't calculate alignment for data ref.");
1430
      return false;
1431
    }
1432
 
1433
  return true;
1434
}
1435
 
1436
 
1437
/* Analyze groups of strided accesses: check that DR belongs to a group of
1438
   strided accesses of legal size, step, etc. Detect gaps, single element
1439
   interleaving, and other special cases. Set strided access info.
1440
   Collect groups of strided stores for further use in SLP analysis.  */
1441
 
1442
static bool
1443
vect_analyze_group_access (struct data_reference *dr)
1444
{
1445
  tree step = DR_STEP (dr);
1446
  tree scalar_type = TREE_TYPE (DR_REF (dr));
1447
  HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1448
  gimple stmt = DR_STMT (dr);
1449
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1450
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1451
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1452
  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1453
  HOST_WIDE_INT stride;
1454
  bool slp_impossible = false;
1455
 
1456
  /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1457
     interleaving group (including gaps).  */
1458
  stride = dr_step / type_size;
1459
 
1460
  /* Not consecutive access is possible only if it is a part of interleaving.  */
1461
  if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1462
    {
1463
      /* Check if it this DR is a part of interleaving, and is a single
1464
         element of the group that is accessed in the loop.  */
1465
 
1466
      /* Gaps are supported only for loads. STEP must be a multiple of the type
1467
         size.  The size of the group must be a power of 2.  */
1468
      if (DR_IS_READ (dr)
1469
          && (dr_step % type_size) == 0
1470
          && stride > 0
1471
          && exact_log2 (stride) != -1)
1472
        {
1473
          DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1474
          DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1475
          if (vect_print_dump_info (REPORT_DR_DETAILS))
1476
            {
1477
              fprintf (vect_dump, "Detected single element interleaving ");
1478
              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1479
              fprintf (vect_dump, " step ");
1480
              print_generic_expr (vect_dump, step, TDF_SLIM);
1481
            }
1482
          return true;
1483
        }
1484
      if (vect_print_dump_info (REPORT_DETAILS))
1485
        fprintf (vect_dump, "not consecutive access");
1486
      return false;
1487
    }
1488
 
1489
  if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1490
    {
1491
      /* First stmt in the interleaving chain. Check the chain.  */
1492
      gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1493
      struct data_reference *data_ref = dr;
1494
      unsigned int count = 1;
1495
      tree next_step;
1496
      tree prev_init = DR_INIT (data_ref);
1497
      gimple prev = stmt;
1498
      HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1499
 
1500
      while (next)
1501
        {
1502
          /* Skip same data-refs. In case that two or more stmts share data-ref
1503
             (supported only for loads), we vectorize only the first stmt, and
1504
             the rest get their vectorized loads from the first one.  */
1505
          if (!tree_int_cst_compare (DR_INIT (data_ref),
1506
                                     DR_INIT (STMT_VINFO_DATA_REF (
1507
                                                   vinfo_for_stmt (next)))))
1508
            {
1509
              if (!DR_IS_READ (data_ref))
1510
                {
1511
                  if (vect_print_dump_info (REPORT_DETAILS))
1512
                    fprintf (vect_dump, "Two store stmts share the same dr.");
1513
                  return false;
1514
                }
1515
 
1516
              /* Check that there is no load-store dependencies for this loads
1517
                 to prevent a case of load-store-load to the same location.  */
1518
              if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1519
                  || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1520
                {
1521
                  if (vect_print_dump_info (REPORT_DETAILS))
1522
                    fprintf (vect_dump,
1523
                             "READ_WRITE dependence in interleaving.");
1524
                  return false;
1525
                }
1526
 
1527
              /* For load use the same data-ref load.  */
1528
              DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1529
 
1530
              prev = next;
1531
              next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1532
              continue;
1533
            }
1534
          prev = next;
1535
 
1536
          /* Check that all the accesses have the same STEP.  */
1537
          next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1538
          if (tree_int_cst_compare (step, next_step))
1539
            {
1540
              if (vect_print_dump_info (REPORT_DETAILS))
1541
                fprintf (vect_dump, "not consecutive access in interleaving");
1542
              return false;
1543
            }
1544
 
1545
          data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1546
          /* Check that the distance between two accesses is equal to the type
1547
             size. Otherwise, we have gaps.  */
1548
          diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1549
                  - TREE_INT_CST_LOW (prev_init)) / type_size;
1550
          if (diff != 1)
1551
            {
1552
              /* FORNOW: SLP of accesses with gaps is not supported.  */
1553
              slp_impossible = true;
1554
              if (!DR_IS_READ (data_ref))
1555
                {
1556
                  if (vect_print_dump_info (REPORT_DETAILS))
1557
                    fprintf (vect_dump, "interleaved store with gaps");
1558
                  return false;
1559
                }
1560
 
1561
              gaps += diff - 1;
1562
            }
1563
 
1564
          /* Store the gap from the previous member of the group. If there is no
1565
             gap in the access, DR_GROUP_GAP is always 1.  */
1566
          DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1567
 
1568
          prev_init = DR_INIT (data_ref);
1569
          next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1570
          /* Count the number of data-refs in the chain.  */
1571
          count++;
1572
        }
1573
 
1574
      /* COUNT is the number of accesses found, we multiply it by the size of
1575
         the type to get COUNT_IN_BYTES.  */
1576
      count_in_bytes = type_size * count;
1577
 
1578
      /* Check that the size of the interleaving (including gaps) is not
1579
         greater than STEP.  */
1580
      if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1581
        {
1582
          if (vect_print_dump_info (REPORT_DETAILS))
1583
            {
1584
              fprintf (vect_dump, "interleaving size is greater than step for ");
1585
              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1586
            }
1587
          return false;
1588
        }
1589
 
1590
      /* Check that the size of the interleaving is equal to STEP for stores,
1591
         i.e., that there are no gaps.  */
1592
      if (dr_step && dr_step != count_in_bytes)
1593
        {
1594
          if (DR_IS_READ (dr))
1595
            {
1596
              slp_impossible = true;
1597
              /* There is a gap after the last load in the group. This gap is a
1598
                 difference between the stride and the number of elements. When
1599
                 there is no gap, this difference should be 0.  */
1600
              DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1601
            }
1602
          else
1603
            {
1604
              if (vect_print_dump_info (REPORT_DETAILS))
1605
                fprintf (vect_dump, "interleaved store with gaps");
1606
              return false;
1607
            }
1608
        }
1609
 
1610
      /* Check that STEP is a multiple of type size.  */
1611
      if (dr_step && (dr_step % type_size) != 0)
1612
        {
1613
          if (vect_print_dump_info (REPORT_DETAILS))
1614
            {
1615
              fprintf (vect_dump, "step is not a multiple of type size: step ");
1616
              print_generic_expr (vect_dump, step, TDF_SLIM);
1617
              fprintf (vect_dump, " size ");
1618
              print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1619
                                  TDF_SLIM);
1620
            }
1621
          return false;
1622
        }
1623
 
1624
      /* FORNOW: we handle only interleaving that is a power of 2.
1625
         We don't fail here if it may be still possible to vectorize the
1626
         group using SLP. If not, the size of the group will be checked in
1627
         vect_analyze_operations, and the vectorization will fail.  */
1628
      if (exact_log2 (stride) == -1)
1629
        {
1630
          if (vect_print_dump_info (REPORT_DETAILS))
1631
            fprintf (vect_dump, "interleaving is not a power of 2");
1632
 
1633
          if (slp_impossible)
1634
            return false;
1635
        }
1636
 
1637
      if (stride == 0)
1638
        stride = count;
1639
 
1640
      DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1641
      if (vect_print_dump_info (REPORT_DETAILS))
1642
        fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1643
 
1644
      /* SLP: create an SLP data structure for every interleaving group of
1645
         stores for further analysis in vect_analyse_slp.  */
1646
      if (!DR_IS_READ (dr) && !slp_impossible)
1647
        {
1648
          if (loop_vinfo)
1649
            VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1650
                           stmt);
1651
          if (bb_vinfo)
1652
            VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1653
                           stmt);
1654
        }
1655
    }
1656
 
1657
  return true;
1658
}
1659
 
1660
 
1661
/* Analyze the access pattern of the data-reference DR.
1662
   In case of non-consecutive accesses call vect_analyze_group_access() to
1663
   analyze groups of strided accesses.  */
1664
 
1665
static bool
1666
vect_analyze_data_ref_access (struct data_reference *dr)
1667
{
1668
  tree step = DR_STEP (dr);
1669
  tree scalar_type = TREE_TYPE (DR_REF (dr));
1670
  gimple stmt = DR_STMT (dr);
1671
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1672
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1673
  struct loop *loop = NULL;
1674
  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1675
 
1676
  if (loop_vinfo)
1677
    loop = LOOP_VINFO_LOOP (loop_vinfo);
1678
 
1679
  if (loop_vinfo && !step)
1680
    {
1681
      if (vect_print_dump_info (REPORT_DETAILS))
1682
        fprintf (vect_dump, "bad data-ref access in loop");
1683
      return false;
1684
    }
1685
 
1686
  /* Don't allow invariant accesses in loops.  */
1687
  if (loop_vinfo && dr_step == 0)
1688
    return false;
1689
 
1690
  if (loop && nested_in_vect_loop_p (loop, stmt))
1691
    {
1692
      /* Interleaved accesses are not yet supported within outer-loop
1693
        vectorization for references in the inner-loop.  */
1694
      DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1695
 
1696
      /* For the rest of the analysis we use the outer-loop step.  */
1697
      step = STMT_VINFO_DR_STEP (stmt_info);
1698
      dr_step = TREE_INT_CST_LOW (step);
1699
 
1700
      if (dr_step == 0)
1701
        {
1702
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1703
            fprintf (vect_dump, "zero step in outer loop.");
1704
          if (DR_IS_READ (dr))
1705
            return true;
1706
          else
1707
            return false;
1708
        }
1709
    }
1710
 
1711
  /* Consecutive?  */
1712
  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1713
    {
1714
      /* Mark that it is not interleaving.  */
1715
      DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1716
      return true;
1717
    }
1718
 
1719
  if (loop && nested_in_vect_loop_p (loop, stmt))
1720
    {
1721
      if (vect_print_dump_info (REPORT_ALIGNMENT))
1722
        fprintf (vect_dump, "strided access in outer loop.");
1723
      return false;
1724
    }
1725
 
1726
  /* Not consecutive access - check if it's a part of interleaving group.  */
1727
  return vect_analyze_group_access (dr);
1728
}
1729
 
1730
 
1731
/* Function vect_analyze_data_ref_accesses.
1732
 
1733
   Analyze the access pattern of all the data references in the loop.
1734
 
1735
   FORNOW: the only access pattern that is considered vectorizable is a
1736
           simple step 1 (consecutive) access.
1737
 
1738
   FORNOW: handle only arrays and pointer accesses.  */
1739
 
1740
bool
1741
vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1742
{
1743
  unsigned int i;
1744
  VEC (data_reference_p, heap) *datarefs;
1745
  struct data_reference *dr;
1746
 
1747
  if (vect_print_dump_info (REPORT_DETAILS))
1748
    fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1749
 
1750
  if (loop_vinfo)
1751
    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1752
  else
1753
    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1754
 
1755
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1756
    if (!vect_analyze_data_ref_access (dr))
1757
      {
1758
        if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1759
          fprintf (vect_dump, "not vectorized: complicated access pattern.");
1760
        return false;
1761
      }
1762
 
1763
  return true;
1764
}
1765
 
1766
/* Function vect_prune_runtime_alias_test_list.
1767
 
1768
   Prune a list of ddrs to be tested at run-time by versioning for alias.
1769
   Return FALSE if resulting list of ddrs is longer then allowed by
1770
   PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
1771
 
1772
bool
1773
vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1774
{
1775
  VEC (ddr_p, heap) * ddrs =
1776
    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1777
  unsigned i, j;
1778
 
1779
  if (vect_print_dump_info (REPORT_DETAILS))
1780
    fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1781
 
1782
  for (i = 0; i < VEC_length (ddr_p, ddrs); )
1783
    {
1784
      bool found;
1785
      ddr_p ddr_i;
1786
 
1787
      ddr_i = VEC_index (ddr_p, ddrs, i);
1788
      found = false;
1789
 
1790
      for (j = 0; j < i; j++)
1791
        {
1792
          ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1793
 
1794
          if (vect_vfa_range_equal (ddr_i, ddr_j))
1795
            {
1796
              if (vect_print_dump_info (REPORT_DR_DETAILS))
1797
                {
1798
                  fprintf (vect_dump, "found equal ranges ");
1799
                  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1800
                  fprintf (vect_dump, ", ");
1801
                  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1802
                  fprintf (vect_dump, " and ");
1803
                  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1804
                  fprintf (vect_dump, ", ");
1805
                  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1806
                }
1807
              found = true;
1808
              break;
1809
            }
1810
        }
1811
 
1812
      if (found)
1813
      {
1814
        VEC_ordered_remove (ddr_p, ddrs, i);
1815
        continue;
1816
      }
1817
      i++;
1818
    }
1819
 
1820
  if (VEC_length (ddr_p, ddrs) >
1821
       (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1822
    {
1823
      if (vect_print_dump_info (REPORT_DR_DETAILS))
1824
        {
1825
          fprintf (vect_dump,
1826
                   "disable versioning for alias - max number of generated "
1827
                   "checks exceeded.");
1828
        }
1829
 
1830
      VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1831
 
1832
      return false;
1833
    }
1834
 
1835
  return true;
1836
}
1837
 
1838
 
1839
/* Function vect_analyze_data_refs.
1840
 
1841
  Find all the data references in the loop or basic block.
1842
 
1843
   The general structure of the analysis of data refs in the vectorizer is as
1844
   follows:
1845
   1- vect_analyze_data_refs(loop/bb): call
1846
      compute_data_dependences_for_loop/bb to find and analyze all data-refs
1847
      in the loop/bb and their dependences.
1848
   2- vect_analyze_dependences(): apply dependence testing using ddrs.
1849
   3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1850
   4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1851
 
1852
*/
1853
 
1854
bool
1855
vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1856
{
1857
  struct loop *loop = NULL;
1858
  basic_block bb = NULL;
1859
  unsigned int i;
1860
  VEC (data_reference_p, heap) *datarefs;
1861
  struct data_reference *dr;
1862
  tree scalar_type;
1863
  bool res;
1864
 
1865
  if (vect_print_dump_info (REPORT_DETAILS))
1866
    fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1867
 
1868
  if (loop_vinfo)
1869
    {
1870
      loop = LOOP_VINFO_LOOP (loop_vinfo);
1871
      res = compute_data_dependences_for_loop
1872
        (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
1873
         &LOOP_VINFO_DDRS (loop_vinfo));
1874
 
1875
      if (!res)
1876
        {
1877
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1878
            fprintf (vect_dump, "not vectorized: loop contains function calls"
1879
                     " or data references that cannot be analyzed");
1880
          return false;
1881
        }
1882
 
1883
      datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1884
    }
1885
  else
1886
    {
1887
      bb = BB_VINFO_BB (bb_vinfo);
1888
      res = compute_data_dependences_for_bb (bb, true,
1889
                                             &BB_VINFO_DATAREFS (bb_vinfo),
1890
                                             &BB_VINFO_DDRS (bb_vinfo));
1891
      if (!res)
1892
        {
1893
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1894
            fprintf (vect_dump, "not vectorized: basic block contains function"
1895
                     " calls or data references that cannot be analyzed");
1896
          return false;
1897
        }
1898
 
1899
      datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1900
    }
1901
 
1902
  /* Go through the data-refs, check that the analysis succeeded. Update pointer
1903
     from stmt_vec_info struct to DR and vectype.  */
1904
 
1905
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1906
    {
1907
      gimple stmt;
1908
      stmt_vec_info stmt_info;
1909
      tree base, offset, init;
1910
 
1911
      if (!dr || !DR_REF (dr))
1912
        {
1913
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1914
            fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1915
          return false;
1916
        }
1917
 
1918
      stmt = DR_STMT (dr);
1919
      stmt_info = vinfo_for_stmt (stmt);
1920
 
1921
      /* Check that analysis of the data-ref succeeded.  */
1922
      if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1923
          || !DR_STEP (dr))
1924
        {
1925
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1926
            {
1927
              fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1928
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1929
            }
1930
          return false;
1931
        }
1932
 
1933
      if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1934
        {
1935
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1936
            fprintf (vect_dump, "not vectorized: base addr of dr is a "
1937
                     "constant");
1938
          return false;
1939
        }
1940
 
1941
      base = unshare_expr (DR_BASE_ADDRESS (dr));
1942
      offset = unshare_expr (DR_OFFSET (dr));
1943
      init = unshare_expr (DR_INIT (dr));
1944
 
1945
      /* Update DR field in stmt_vec_info struct.  */
1946
 
1947
      /* If the dataref is in an inner-loop of the loop that is considered for
1948
         for vectorization, we also want to analyze the access relative to
1949
         the outer-loop (DR contains information only relative to the
1950
         inner-most enclosing loop).  We do that by building a reference to the
1951
         first location accessed by the inner-loop, and analyze it relative to
1952
         the outer-loop.  */
1953
      if (loop && nested_in_vect_loop_p (loop, stmt))
1954
        {
1955
          tree outer_step, outer_base, outer_init;
1956
          HOST_WIDE_INT pbitsize, pbitpos;
1957
          tree poffset;
1958
          enum machine_mode pmode;
1959
          int punsignedp, pvolatilep;
1960
          affine_iv base_iv, offset_iv;
1961
          tree dinit;
1962
 
1963
          /* Build a reference to the first location accessed by the
1964
             inner-loop: *(BASE+INIT). (The first location is actually
1965
             BASE+INIT+OFFSET, but we add OFFSET separately later).  */
1966
          tree inner_base = build_fold_indirect_ref
1967
                                (fold_build2 (POINTER_PLUS_EXPR,
1968
                                              TREE_TYPE (base), base,
1969
                                              fold_convert (sizetype, init)));
1970
 
1971
          if (vect_print_dump_info (REPORT_DETAILS))
1972
            {
1973
              fprintf (vect_dump, "analyze in outer-loop: ");
1974
              print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1975
            }
1976
 
1977
          outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1978
                          &poffset, &pmode, &punsignedp, &pvolatilep, false);
1979
          gcc_assert (outer_base != NULL_TREE);
1980
 
1981
          if (pbitpos % BITS_PER_UNIT != 0)
1982
            {
1983
              if (vect_print_dump_info (REPORT_DETAILS))
1984
                fprintf (vect_dump, "failed: bit offset alignment.\n");
1985
              return false;
1986
            }
1987
 
1988
          outer_base = build_fold_addr_expr (outer_base);
1989
          if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1990
                          &base_iv, false))
1991
            {
1992
              if (vect_print_dump_info (REPORT_DETAILS))
1993
                fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1994
              return false;
1995
            }
1996
 
1997
          if (offset)
1998
            {
1999
              if (poffset)
2000
                poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2001
                                       poffset);
2002
              else
2003
                poffset = offset;
2004
            }
2005
 
2006
          if (!poffset)
2007
            {
2008
              offset_iv.base = ssize_int (0);
2009
              offset_iv.step = ssize_int (0);
2010
            }
2011
          else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2012
                               &offset_iv, false))
2013
            {
2014
              if (vect_print_dump_info (REPORT_DETAILS))
2015
                fprintf (vect_dump, "evolution of offset is not affine.\n");
2016
              return false;
2017
            }
2018
 
2019
          outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2020
          split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2021
          outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
2022
          split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2023
          outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
2024
 
2025
          outer_step = size_binop (PLUS_EXPR,
2026
                                fold_convert (ssizetype, base_iv.step),
2027
                                fold_convert (ssizetype, offset_iv.step));
2028
 
2029
          STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2030
          /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2031
          STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2032
          STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2033
          STMT_VINFO_DR_OFFSET (stmt_info) =
2034
                                fold_convert (ssizetype, offset_iv.base);
2035
          STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2036
                                size_int (highest_pow2_factor (offset_iv.base));
2037
 
2038
          if (vect_print_dump_info (REPORT_DETAILS))
2039
            {
2040
              fprintf (vect_dump, "\touter base_address: ");
2041
              print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2042
              fprintf (vect_dump, "\n\touter offset from base address: ");
2043
              print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2044
              fprintf (vect_dump, "\n\touter constant offset from base address: ");
2045
              print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2046
              fprintf (vect_dump, "\n\touter step: ");
2047
              print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2048
              fprintf (vect_dump, "\n\touter aligned to: ");
2049
              print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2050
            }
2051
        }
2052
 
2053
      if (STMT_VINFO_DATA_REF (stmt_info))
2054
        {
2055
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2056
            {
2057
              fprintf (vect_dump,
2058
                       "not vectorized: more than one data ref in stmt: ");
2059
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2060
            }
2061
          return false;
2062
        }
2063
 
2064
      STMT_VINFO_DATA_REF (stmt_info) = dr;
2065
 
2066
      /* Set vectype for STMT.  */
2067
      scalar_type = TREE_TYPE (DR_REF (dr));
2068
      STMT_VINFO_VECTYPE (stmt_info) =
2069
                get_vectype_for_scalar_type (scalar_type);
2070
      if (!STMT_VINFO_VECTYPE (stmt_info))
2071
        {
2072
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2073
            {
2074
              fprintf (vect_dump,
2075
                       "not vectorized: no vectype for stmt: ");
2076
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2077
              fprintf (vect_dump, " scalar_type: ");
2078
              print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2079
            }
2080
          return false;
2081
        }
2082
    }
2083
 
2084
  return true;
2085
}
2086
 
2087
 
2088
/* Function vect_get_new_vect_var.
2089
 
2090
   Returns a name for a new variable. The current naming scheme appends the
2091
   prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2092
   the name of vectorizer generated variables, and appends that to NAME if
2093
   provided.  */
2094
 
2095
tree
2096
vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2097
{
2098
  const char *prefix;
2099
  tree new_vect_var;
2100
 
2101
  switch (var_kind)
2102
  {
2103
  case vect_simple_var:
2104
    prefix = "vect_";
2105
    break;
2106
  case vect_scalar_var:
2107
    prefix = "stmp_";
2108
    break;
2109
  case vect_pointer_var:
2110
    prefix = "vect_p";
2111
    break;
2112
  default:
2113
    gcc_unreachable ();
2114
  }
2115
 
2116
  if (name)
2117
    {
2118
      char* tmp = concat (prefix, name, NULL);
2119
      new_vect_var = create_tmp_var (type, tmp);
2120
      free (tmp);
2121
    }
2122
  else
2123
    new_vect_var = create_tmp_var (type, prefix);
2124
 
2125
  /* Mark vector typed variable as a gimple register variable.  */
2126
  if (TREE_CODE (type) == VECTOR_TYPE)
2127
    DECL_GIMPLE_REG_P (new_vect_var) = true;
2128
 
2129
  return new_vect_var;
2130
}
2131
 
2132
 
2133
/* Function vect_create_addr_base_for_vector_ref.
2134
 
2135
   Create an expression that computes the address of the first memory location
2136
   that will be accessed for a data reference.
2137
 
2138
   Input:
2139
   STMT: The statement containing the data reference.
2140
   NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2141
   OFFSET: Optional. If supplied, it is be added to the initial address.
2142
   LOOP:    Specify relative to which loop-nest should the address be computed.
2143
            For example, when the dataref is in an inner-loop nested in an
2144
            outer-loop that is now being vectorized, LOOP can be either the
2145
            outer-loop, or the inner-loop. The first memory location accessed
2146
            by the following dataref ('in' points to short):
2147
 
2148
                for (i=0; i<N; i++)
2149
                   for (j=0; j<M; j++)
2150
                     s += in[i+j]
2151
 
2152
            is as follows:
2153
            if LOOP=i_loop:     &in             (relative to i_loop)
2154
            if LOOP=j_loop:     &in+i*2B        (relative to j_loop)
2155
 
2156
   Output:
2157
   1. Return an SSA_NAME whose value is the address of the memory location of
2158
      the first vector of the data reference.
2159
   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2160
      these statement(s) which define the returned SSA_NAME.
2161
 
2162
   FORNOW: We are only handling array accesses with step 1.  */
2163
 
2164
tree
2165
vect_create_addr_base_for_vector_ref (gimple stmt,
2166
                                      gimple_seq *new_stmt_list,
2167
                                      tree offset,
2168
                                      struct loop *loop)
2169
{
2170
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2171
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2172
  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2173
  tree base_name;
2174
  tree data_ref_base_var;
2175
  tree vec_stmt;
2176
  tree addr_base, addr_expr;
2177
  tree dest;
2178
  gimple_seq seq = NULL;
2179
  tree base_offset = unshare_expr (DR_OFFSET (dr));
2180
  tree init = unshare_expr (DR_INIT (dr));
2181
  tree vect_ptr_type;
2182
  tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2183
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2184
 
2185
  if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2186
    {
2187
      struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2188
 
2189
      gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2190
 
2191
      data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2192
      base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2193
      init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2194
    }
2195
 
2196
  if (loop_vinfo)
2197
    base_name = build_fold_indirect_ref (data_ref_base);
2198
  else
2199
    {
2200
      base_offset = ssize_int (0);
2201
      init = ssize_int (0);
2202
      base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2203
    }
2204
 
2205
  data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2206
  add_referenced_var (data_ref_base_var);
2207
  data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2208
                                        data_ref_base_var);
2209
  gimple_seq_add_seq (new_stmt_list, seq);
2210
 
2211
  /* Create base_offset */
2212
  base_offset = size_binop (PLUS_EXPR,
2213
                            fold_convert (sizetype, base_offset),
2214
                            fold_convert (sizetype, init));
2215
  dest = create_tmp_var (sizetype, "base_off");
2216
  add_referenced_var (dest);
2217
  base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2218
  gimple_seq_add_seq (new_stmt_list, seq);
2219
 
2220
  if (offset)
2221
    {
2222
      tree tmp = create_tmp_var (sizetype, "offset");
2223
 
2224
      add_referenced_var (tmp);
2225
      offset = fold_build2 (MULT_EXPR, sizetype,
2226
                            fold_convert (sizetype, offset), step);
2227
      base_offset = fold_build2 (PLUS_EXPR, sizetype,
2228
                                 base_offset, offset);
2229
      base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2230
      gimple_seq_add_seq (new_stmt_list, seq);
2231
    }
2232
 
2233
  /* base + base_offset */
2234
  if (loop_vinfo)
2235
    addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2236
                             data_ref_base, base_offset);
2237
  else
2238
    {
2239
      if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2240
        addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2241
      else
2242
        addr_base = build1 (ADDR_EXPR,
2243
                            build_pointer_type (TREE_TYPE (DR_REF (dr))),
2244
                            unshare_expr (DR_REF (dr)));
2245
    }
2246
 
2247
  vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2248
 
2249
  vec_stmt = fold_convert (vect_ptr_type, addr_base);
2250
  addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2251
                                     get_name (base_name));
2252
  add_referenced_var (addr_expr);
2253
  vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2254
  gimple_seq_add_seq (new_stmt_list, seq);
2255
 
2256
  if (vect_print_dump_info (REPORT_DETAILS))
2257
    {
2258
      fprintf (vect_dump, "created ");
2259
      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2260
    }
2261
 
2262
  return vec_stmt;
2263
}
2264
 
2265
 
2266
/* Function vect_create_data_ref_ptr.
2267
 
2268
   Create a new pointer to vector type (vp), that points to the first location
2269
   accessed in the loop by STMT, along with the def-use update chain to
2270
   appropriately advance the pointer through the loop iterations. Also set
2271
   aliasing information for the pointer.  This vector pointer is used by the
2272
   callers to this function to create a memory reference expression for vector
2273
   load/store access.
2274
 
2275
   Input:
2276
   1. STMT: a stmt that references memory. Expected to be of the form
2277
         GIMPLE_ASSIGN <name, data-ref> or
2278
         GIMPLE_ASSIGN <data-ref, name>.
2279
   2. AT_LOOP: the loop where the vector memref is to be created.
2280
   3. OFFSET (optional): an offset to be added to the initial address accessed
2281
        by the data-ref in STMT.
2282
   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2283
        pointing to the initial address.
2284
   5. TYPE: if not NULL indicates the required type of the data-ref.
2285
 
2286
   Output:
2287
   1. Declare a new ptr to vector_type, and have it point to the base of the
2288
      data reference (initial addressed accessed by the data reference).
2289
      For example, for vector of type V8HI, the following code is generated:
2290
 
2291
      v8hi *vp;
2292
      vp = (v8hi *)initial_address;
2293
 
2294
      if OFFSET is not supplied:
2295
         initial_address = &a[init];
2296
      if OFFSET is supplied:
2297
         initial_address = &a[init + OFFSET];
2298
 
2299
      Return the initial_address in INITIAL_ADDRESS.
2300
 
2301
   2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
2302
      update the pointer in each iteration of the loop.
2303
 
2304
      Return the increment stmt that updates the pointer in PTR_INCR.
2305
 
2306
   3. Set INV_P to true if the access pattern of the data reference in the
2307
      vectorized loop is invariant. Set it to false otherwise.
2308
 
2309
   4. Return the pointer.  */
2310
 
2311
tree
2312
vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2313
                          tree offset, tree *initial_address, gimple *ptr_incr,
2314
                          bool only_init, bool *inv_p)
2315
{
2316
  tree base_name;
2317
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2318
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2319
  struct loop *loop = NULL;
2320
  bool nested_in_vect_loop = false;
2321
  struct loop *containing_loop = NULL;
2322
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2323
  tree vect_ptr_type;
2324
  tree vect_ptr;
2325
  tree new_temp;
2326
  gimple vec_stmt;
2327
  gimple_seq new_stmt_list = NULL;
2328
  edge pe = NULL;
2329
  basic_block new_bb;
2330
  tree vect_ptr_init;
2331
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2332
  tree vptr;
2333
  gimple_stmt_iterator incr_gsi;
2334
  bool insert_after;
2335
  tree indx_before_incr, indx_after_incr;
2336
  gimple incr;
2337
  tree step;
2338
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2339
  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2340
 
2341
  if (loop_vinfo)
2342
    {
2343
      loop = LOOP_VINFO_LOOP (loop_vinfo);
2344
      nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2345
      containing_loop = (gimple_bb (stmt))->loop_father;
2346
      pe = loop_preheader_edge (loop);
2347
    }
2348
  else
2349
    {
2350
      gcc_assert (bb_vinfo);
2351
      only_init = true;
2352
      *ptr_incr = NULL;
2353
    }
2354
 
2355
  /* Check the step (evolution) of the load in LOOP, and record
2356
     whether it's invariant.  */
2357
  if (nested_in_vect_loop)
2358
    step = STMT_VINFO_DR_STEP (stmt_info);
2359
  else
2360
    step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2361
 
2362
  if (tree_int_cst_compare (step, size_zero_node) == 0)
2363
    *inv_p = true;
2364
  else
2365
    *inv_p = false;
2366
 
2367
  /* Create an expression for the first address accessed by this load
2368
     in LOOP.  */
2369
  base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2370
 
2371
  if (vect_print_dump_info (REPORT_DETAILS))
2372
    {
2373
      tree data_ref_base = base_name;
2374
      fprintf (vect_dump, "create vector-pointer variable to type: ");
2375
      print_generic_expr (vect_dump, vectype, TDF_SLIM);
2376
      if (TREE_CODE (data_ref_base) == VAR_DECL
2377
          || TREE_CODE (data_ref_base) == ARRAY_REF)
2378
        fprintf (vect_dump, "  vectorizing an array ref: ");
2379
      else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2380
        fprintf (vect_dump, "  vectorizing a record based array ref: ");
2381
      else if (TREE_CODE (data_ref_base) == SSA_NAME)
2382
        fprintf (vect_dump, "  vectorizing a pointer ref: ");
2383
      print_generic_expr (vect_dump, base_name, TDF_SLIM);
2384
    }
2385
 
2386
  /** (1) Create the new vector-pointer variable:  **/
2387
  vect_ptr_type = build_pointer_type (vectype);
2388
  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2389
                                    get_name (base_name));
2390
 
2391
  /* Vector types inherit the alias set of their component type by default so
2392
     we need to use a ref-all pointer if the data reference does not conflict
2393
     with the created vector data reference because it is not addressable.  */
2394
  if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2395
                              get_alias_set (DR_REF (dr))))
2396
    {
2397
      vect_ptr_type
2398
        = build_pointer_type_for_mode (vectype,
2399
                                       TYPE_MODE (vect_ptr_type), true);
2400
      vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2401
                                        get_name (base_name));
2402
    }
2403
 
2404
  /* Likewise for any of the data references in the stmt group.  */
2405
  else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2406
    {
2407
      gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2408
      do
2409
        {
2410
          tree lhs = gimple_assign_lhs (orig_stmt);
2411
          if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2412
                                      get_alias_set (lhs)))
2413
            {
2414
              vect_ptr_type
2415
                = build_pointer_type_for_mode (vectype,
2416
                                               TYPE_MODE (vect_ptr_type), true);
2417
              vect_ptr
2418
                = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2419
                                         get_name (base_name));
2420
              break;
2421
            }
2422
 
2423
          orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2424
        }
2425
      while (orig_stmt);
2426
    }
2427
 
2428
  add_referenced_var (vect_ptr);
2429
 
2430
  /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2431
      vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2432
      def-use update cycles for the pointer: One relative to the outer-loop
2433
      (LOOP), which is what steps (3) and (4) below do. The other is relative
2434
      to the inner-loop (which is the inner-most loop containing the dataref),
2435
      and this is done be step (5) below.
2436
 
2437
      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2438
      inner-most loop, and so steps (3),(4) work the same, and step (5) is
2439
      redundant.  Steps (3),(4) create the following:
2440
 
2441
        vp0 = &base_addr;
2442
        LOOP:   vp1 = phi(vp0,vp2)
2443
                ...
2444
                ...
2445
                vp2 = vp1 + step
2446
                goto LOOP
2447
 
2448
      If there is an inner-loop nested in loop, then step (5) will also be
2449
      applied, and an additional update in the inner-loop will be created:
2450
 
2451
        vp0 = &base_addr;
2452
        LOOP:   vp1 = phi(vp0,vp2)
2453
                ...
2454
        inner:     vp3 = phi(vp1,vp4)
2455
                   vp4 = vp3 + inner_step
2456
                   if () goto inner
2457
                ...
2458
                vp2 = vp1 + step
2459
                if () goto LOOP   */
2460
 
2461
  /** (3) Calculate the initial address the vector-pointer, and set
2462
          the vector-pointer to point to it before the loop:  **/
2463
 
2464
  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
2465
 
2466
  new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2467
                                                   offset, loop);
2468
  if (new_stmt_list)
2469
    {
2470
      if (pe)
2471
        {
2472
          new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2473
          gcc_assert (!new_bb);
2474
        }
2475
      else
2476
        gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2477
    }
2478
 
2479
  *initial_address = new_temp;
2480
 
2481
  /* Create: p = (vectype *) initial_base  */
2482
  vec_stmt = gimple_build_assign (vect_ptr,
2483
                                  fold_convert (vect_ptr_type, new_temp));
2484
  vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2485
  gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2486
  if (pe)
2487
    {
2488
      new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2489
      gcc_assert (!new_bb);
2490
    }
2491
  else
2492
    gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2493
 
2494
  /** (4) Handle the updating of the vector-pointer inside the loop.
2495
          This is needed when ONLY_INIT is false, and also when AT_LOOP
2496
          is the inner-loop nested in LOOP (during outer-loop vectorization).
2497
   **/
2498
 
2499
  /* No update in loop is required.  */
2500
  if (only_init && (!loop_vinfo || at_loop == loop))
2501
    {
2502
      /* Copy the points-to information if it exists. */
2503
      if (DR_PTR_INFO (dr))
2504
        duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2505
      vptr = vect_ptr_init;
2506
    }
2507
  else
2508
    {
2509
      /* The step of the vector pointer is the Vector Size.  */
2510
      tree step = TYPE_SIZE_UNIT (vectype);
2511
      /* One exception to the above is when the scalar step of the load in
2512
         LOOP is zero. In this case the step here is also zero.  */
2513
      if (*inv_p)
2514
        step = size_zero_node;
2515
 
2516
      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2517
 
2518
      create_iv (vect_ptr_init,
2519
                 fold_convert (vect_ptr_type, step),
2520
                 vect_ptr, loop, &incr_gsi, insert_after,
2521
                 &indx_before_incr, &indx_after_incr);
2522
      incr = gsi_stmt (incr_gsi);
2523
      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2524
 
2525
      /* Copy the points-to information if it exists. */
2526
      if (DR_PTR_INFO (dr))
2527
        {
2528
          duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2529
          duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2530
        }
2531
      if (ptr_incr)
2532
        *ptr_incr = incr;
2533
 
2534
      vptr = indx_before_incr;
2535
    }
2536
 
2537
  if (!nested_in_vect_loop || only_init)
2538
    return vptr;
2539
 
2540
 
2541
  /** (5) Handle the updating of the vector-pointer inside the inner-loop
2542
          nested in LOOP, if exists: **/
2543
 
2544
  gcc_assert (nested_in_vect_loop);
2545
  if (!only_init)
2546
    {
2547
      standard_iv_increment_position (containing_loop, &incr_gsi,
2548
                                      &insert_after);
2549
      create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2550
                 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2551
                 &indx_after_incr);
2552
      incr = gsi_stmt (incr_gsi);
2553
      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2554
 
2555
      /* Copy the points-to information if it exists. */
2556
      if (DR_PTR_INFO (dr))
2557
        {
2558
          duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2559
          duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2560
        }
2561
      if (ptr_incr)
2562
        *ptr_incr = incr;
2563
 
2564
      return indx_before_incr;
2565
    }
2566
  else
2567
    gcc_unreachable ();
2568
}
2569
 
2570
 
2571
/* Function bump_vector_ptr
2572
 
2573
   Increment a pointer (to a vector type) by vector-size. If requested,
2574
   i.e. if PTR-INCR is given, then also connect the new increment stmt
2575
   to the existing def-use update-chain of the pointer, by modifying
2576
   the PTR_INCR as illustrated below:
2577
 
2578
   The pointer def-use update-chain before this function:
2579
                        DATAREF_PTR = phi (p_0, p_2)
2580
                        ....
2581
        PTR_INCR:       p_2 = DATAREF_PTR + step
2582
 
2583
   The pointer def-use update-chain after this function:
2584
                        DATAREF_PTR = phi (p_0, p_2)
2585
                        ....
2586
                        NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2587
                        ....
2588
        PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
2589
 
2590
   Input:
2591
   DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2592
                 in the loop.
2593
   PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2594
              the loop.  The increment amount across iterations is expected
2595
              to be vector_size.
2596
   BSI - location where the new update stmt is to be placed.
2597
   STMT - the original scalar memory-access stmt that is being vectorized.
2598
   BUMP - optional. The offset by which to bump the pointer. If not given,
2599
          the offset is assumed to be vector_size.
2600
 
2601
   Output: Return NEW_DATAREF_PTR as illustrated above.
2602
 
2603
*/
2604
 
2605
tree
2606
bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2607
                 gimple stmt, tree bump)
2608
{
2609
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2610
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2611
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2612
  tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2613
  tree update = TYPE_SIZE_UNIT (vectype);
2614
  gimple incr_stmt;
2615
  ssa_op_iter iter;
2616
  use_operand_p use_p;
2617
  tree new_dataref_ptr;
2618
 
2619
  if (bump)
2620
    update = bump;
2621
 
2622
  incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2623
                                            dataref_ptr, update);
2624
  new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2625
  gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2626
  vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2627
 
2628
  /* Copy the points-to information if it exists. */
2629
  if (DR_PTR_INFO (dr))
2630
    duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2631
 
2632
  if (!ptr_incr)
2633
    return new_dataref_ptr;
2634
 
2635
  /* Update the vector-pointer's cross-iteration increment.  */
2636
  FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2637
    {
2638
      tree use = USE_FROM_PTR (use_p);
2639
 
2640
      if (use == dataref_ptr)
2641
        SET_USE (use_p, new_dataref_ptr);
2642
      else
2643
        gcc_assert (tree_int_cst_compare (use, update) == 0);
2644
    }
2645
 
2646
  return new_dataref_ptr;
2647
}
2648
 
2649
 
2650
/* Function vect_create_destination_var.
2651
 
2652
   Create a new temporary of type VECTYPE.  */
2653
 
2654
tree
2655
vect_create_destination_var (tree scalar_dest, tree vectype)
2656
{
2657
  tree vec_dest;
2658
  const char *new_name;
2659
  tree type;
2660
  enum vect_var_kind kind;
2661
 
2662
  kind = vectype ? vect_simple_var : vect_scalar_var;
2663
  type = vectype ? vectype : TREE_TYPE (scalar_dest);
2664
 
2665
  gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2666
 
2667
  new_name = get_name (scalar_dest);
2668
  if (!new_name)
2669
    new_name = "var_";
2670
  vec_dest = vect_get_new_vect_var (type, kind, new_name);
2671
  add_referenced_var (vec_dest);
2672
 
2673
  return vec_dest;
2674
}
2675
 
2676
/* Function vect_strided_store_supported.
2677
 
2678
   Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2679
   and FALSE otherwise.  */
2680
 
2681
bool
2682
vect_strided_store_supported (tree vectype)
2683
{
2684
  optab interleave_high_optab, interleave_low_optab;
2685
  int mode;
2686
 
2687
  mode = (int) TYPE_MODE (vectype);
2688
 
2689
  /* Check that the operation is supported.  */
2690
  interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2691
                                               vectype, optab_default);
2692
  interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2693
                                              vectype, optab_default);
2694
  if (!interleave_high_optab || !interleave_low_optab)
2695
    {
2696
      if (vect_print_dump_info (REPORT_DETAILS))
2697
        fprintf (vect_dump, "no optab for interleave.");
2698
      return false;
2699
    }
2700
 
2701
  if (optab_handler (interleave_high_optab, mode)->insn_code
2702
      == CODE_FOR_nothing
2703
      || optab_handler (interleave_low_optab, mode)->insn_code
2704
      == CODE_FOR_nothing)
2705
    {
2706
      if (vect_print_dump_info (REPORT_DETAILS))
2707
        fprintf (vect_dump, "interleave op not supported by target.");
2708
      return false;
2709
    }
2710
 
2711
  return true;
2712
}
2713
 
2714
 
2715
/* Function vect_permute_store_chain.
2716
 
2717
   Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2718
   a power of 2, generate interleave_high/low stmts to reorder the data
2719
   correctly for the stores. Return the final references for stores in
2720
   RESULT_CHAIN.
2721
 
2722
   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2723
   The input is 4 vectors each containing 8 elements. We assign a number to each
2724
   element, the input sequence is:
2725
 
2726
   1st vec:   0  1  2  3  4  5  6  7
2727
   2nd vec:   8  9 10 11 12 13 14 15
2728
   3rd vec:  16 17 18 19 20 21 22 23
2729
   4th vec:  24 25 26 27 28 29 30 31
2730
 
2731
   The output sequence should be:
2732
 
2733
   1st vec:  0  8 16 24  1  9 17 25
2734
   2nd vec:  2 10 18 26  3 11 19 27
2735
   3rd vec:  4 12 20 28  5 13 21 30
2736
   4th vec:  6 14 22 30  7 15 23 31
2737
 
2738
   i.e., we interleave the contents of the four vectors in their order.
2739
 
2740
   We use interleave_high/low instructions to create such output. The input of
2741
   each interleave_high/low operation is two vectors:
2742
   1st vec    2nd vec
2743
 
2744
   the even elements of the result vector are obtained left-to-right from the
2745
   high/low elements of the first vector. The odd elements of the result are
2746
   obtained left-to-right from the high/low elements of the second vector.
2747
   The output of interleave_high will be:   0 4 1 5
2748
   and of interleave_low:                   2 6 3 7
2749
 
2750
 
2751
   The permutation is done in log LENGTH stages. In each stage interleave_high
2752
   and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2753
   where the first argument is taken from the first half of DR_CHAIN and the
2754
   second argument from it's second half.
2755
   In our example,
2756
 
2757
   I1: interleave_high (1st vec, 3rd vec)
2758
   I2: interleave_low (1st vec, 3rd vec)
2759
   I3: interleave_high (2nd vec, 4th vec)
2760
   I4: interleave_low (2nd vec, 4th vec)
2761
 
2762
   The output for the first stage is:
2763
 
2764
   I1:  0 16  1 17  2 18  3 19
2765
   I2:  4 20  5 21  6 22  7 23
2766
   I3:  8 24  9 25 10 26 11 27
2767
   I4: 12 28 13 29 14 30 15 31
2768
 
2769
   The output of the second stage, i.e. the final result is:
2770
 
2771
   I1:  0  8 16 24  1  9 17 25
2772
   I2:  2 10 18 26  3 11 19 27
2773
   I3:  4 12 20 28  5 13 21 30
2774
   I4:  6 14 22 30  7 15 23 31.  */
2775
 
2776
bool
2777
vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2778
                          unsigned int length,
2779
                          gimple stmt,
2780
                          gimple_stmt_iterator *gsi,
2781
                          VEC(tree,heap) **result_chain)
2782
{
2783
  tree perm_dest, vect1, vect2, high, low;
2784
  gimple perm_stmt;
2785
  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2786
  int i;
2787
  unsigned int j;
2788
  enum tree_code high_code, low_code;
2789
 
2790
  /* Check that the operation is supported.  */
2791
  if (!vect_strided_store_supported (vectype))
2792
    return false;
2793
 
2794
  *result_chain = VEC_copy (tree, heap, dr_chain);
2795
 
2796
  for (i = 0; i < exact_log2 (length); i++)
2797
    {
2798
      for (j = 0; j < length/2; j++)
2799
        {
2800
          vect1 = VEC_index (tree, dr_chain, j);
2801
          vect2 = VEC_index (tree, dr_chain, j+length/2);
2802
 
2803
          /* Create interleaving stmt:
2804
             in the case of big endian:
2805
                                high = interleave_high (vect1, vect2)
2806
             and in the case of little endian:
2807
                                high = interleave_low (vect1, vect2).  */
2808
          perm_dest = create_tmp_var (vectype, "vect_inter_high");
2809
          DECL_GIMPLE_REG_P (perm_dest) = 1;
2810
          add_referenced_var (perm_dest);
2811
          if (BYTES_BIG_ENDIAN)
2812
            {
2813
              high_code = VEC_INTERLEAVE_HIGH_EXPR;
2814
              low_code = VEC_INTERLEAVE_LOW_EXPR;
2815
            }
2816
          else
2817
            {
2818
              low_code = VEC_INTERLEAVE_HIGH_EXPR;
2819
              high_code = VEC_INTERLEAVE_LOW_EXPR;
2820
            }
2821
          perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2822
                                                    vect1, vect2);
2823
          high = make_ssa_name (perm_dest, perm_stmt);
2824
          gimple_assign_set_lhs (perm_stmt, high);
2825
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2826
          VEC_replace (tree, *result_chain, 2*j, high);
2827
 
2828
          /* Create interleaving stmt:
2829
             in the case of big endian:
2830
                               low  = interleave_low (vect1, vect2)
2831
             and in the case of little endian:
2832
                               low  = interleave_high (vect1, vect2).  */
2833
          perm_dest = create_tmp_var (vectype, "vect_inter_low");
2834
          DECL_GIMPLE_REG_P (perm_dest) = 1;
2835
          add_referenced_var (perm_dest);
2836
          perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2837
                                                    vect1, vect2);
2838
          low = make_ssa_name (perm_dest, perm_stmt);
2839
          gimple_assign_set_lhs (perm_stmt, low);
2840
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2841
          VEC_replace (tree, *result_chain, 2*j+1, low);
2842
        }
2843
      dr_chain = VEC_copy (tree, heap, *result_chain);
2844
    }
2845
  return true;
2846
}
2847
 
2848
/* Function vect_setup_realignment
2849
 
2850
   This function is called when vectorizing an unaligned load using
2851
   the dr_explicit_realign[_optimized] scheme.
2852
   This function generates the following code at the loop prolog:
2853
 
2854
      p = initial_addr;
2855
   x  msq_init = *(floor(p));   # prolog load
2856
      realignment_token = call target_builtin;
2857
    loop:
2858
   x  msq = phi (msq_init, ---)
2859
 
2860
   The stmts marked with x are generated only for the case of
2861
   dr_explicit_realign_optimized.
2862
 
2863
   The code above sets up a new (vector) pointer, pointing to the first
2864
   location accessed by STMT, and a "floor-aligned" load using that pointer.
2865
   It also generates code to compute the "realignment-token" (if the relevant
2866
   target hook was defined), and creates a phi-node at the loop-header bb
2867
   whose arguments are the result of the prolog-load (created by this
2868
   function) and the result of a load that takes place in the loop (to be
2869
   created by the caller to this function).
2870
 
2871
   For the case of dr_explicit_realign_optimized:
2872
   The caller to this function uses the phi-result (msq) to create the
2873
   realignment code inside the loop, and sets up the missing phi argument,
2874
   as follows:
2875
    loop:
2876
      msq = phi (msq_init, lsq)
2877
      lsq = *(floor(p'));        # load in loop
2878
      result = realign_load (msq, lsq, realignment_token);
2879
 
2880
   For the case of dr_explicit_realign:
2881
    loop:
2882
      msq = *(floor(p));        # load in loop
2883
      p' = p + (VS-1);
2884
      lsq = *(floor(p'));       # load in loop
2885
      result = realign_load (msq, lsq, realignment_token);
2886
 
2887
   Input:
2888
   STMT - (scalar) load stmt to be vectorized. This load accesses
2889
          a memory location that may be unaligned.
2890
   BSI - place where new code is to be inserted.
2891
   ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2892
                              is used.
2893
 
2894
   Output:
2895
   REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2896
                       target hook, if defined.
2897
   Return value - the result of the loop-header phi node.  */
2898
 
2899
tree
2900
vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2901
                        tree *realignment_token,
2902
                        enum dr_alignment_support alignment_support_scheme,
2903
                        tree init_addr,
2904
                        struct loop **at_loop)
2905
{
2906
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2907
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2908
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2909
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2910
  edge pe;
2911
  tree scalar_dest = gimple_assign_lhs (stmt);
2912
  tree vec_dest;
2913
  gimple inc;
2914
  tree ptr;
2915
  tree data_ref;
2916
  gimple new_stmt;
2917
  basic_block new_bb;
2918
  tree msq_init = NULL_TREE;
2919
  tree new_temp;
2920
  gimple phi_stmt;
2921
  tree msq = NULL_TREE;
2922
  gimple_seq stmts = NULL;
2923
  bool inv_p;
2924
  bool compute_in_loop = false;
2925
  bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2926
  struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2927
  struct loop *loop_for_initial_load;
2928
 
2929
  gcc_assert (alignment_support_scheme == dr_explicit_realign
2930
              || alignment_support_scheme == dr_explicit_realign_optimized);
2931
 
2932
  /* We need to generate three things:
2933
     1. the misalignment computation
2934
     2. the extra vector load (for the optimized realignment scheme).
2935
     3. the phi node for the two vectors from which the realignment is
2936
      done (for the optimized realignment scheme).
2937
   */
2938
 
2939
  /* 1. Determine where to generate the misalignment computation.
2940
 
2941
     If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2942
     calculation will be generated by this function, outside the loop (in the
2943
     preheader).  Otherwise, INIT_ADDR had already been computed for us by the
2944
     caller, inside the loop.
2945
 
2946
     Background: If the misalignment remains fixed throughout the iterations of
2947
     the loop, then both realignment schemes are applicable, and also the
2948
     misalignment computation can be done outside LOOP.  This is because we are
2949
     vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2950
     are a multiple of VS (the Vector Size), and therefore the misalignment in
2951
     different vectorized LOOP iterations is always the same.
2952
     The problem arises only if the memory access is in an inner-loop nested
2953
     inside LOOP, which is now being vectorized using outer-loop vectorization.
2954
     This is the only case when the misalignment of the memory access may not
2955
     remain fixed throughout the iterations of the inner-loop (as explained in
2956
     detail in vect_supportable_dr_alignment).  In this case, not only is the
2957
     optimized realignment scheme not applicable, but also the misalignment
2958
     computation (and generation of the realignment token that is passed to
2959
     REALIGN_LOAD) have to be done inside the loop.
2960
 
2961
     In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2962
     or not, which in turn determines if the misalignment is computed inside
2963
     the inner-loop, or outside LOOP.  */
2964
 
2965
  if (init_addr != NULL_TREE)
2966
    {
2967
      compute_in_loop = true;
2968
      gcc_assert (alignment_support_scheme == dr_explicit_realign);
2969
    }
2970
 
2971
 
2972
  /* 2. Determine where to generate the extra vector load.
2973
 
2974
     For the optimized realignment scheme, instead of generating two vector
2975
     loads in each iteration, we generate a single extra vector load in the
2976
     preheader of the loop, and in each iteration reuse the result of the
2977
     vector load from the previous iteration.  In case the memory access is in
2978
     an inner-loop nested inside LOOP, which is now being vectorized using
2979
     outer-loop vectorization, we need to determine whether this initial vector
2980
     load should be generated at the preheader of the inner-loop, or can be
2981
     generated at the preheader of LOOP.  If the memory access has no evolution
2982
     in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2983
     to be generated inside LOOP (in the preheader of the inner-loop).  */
2984
 
2985
  if (nested_in_vect_loop)
2986
    {
2987
      tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2988
      bool invariant_in_outerloop =
2989
            (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2990
      loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2991
    }
2992
  else
2993
    loop_for_initial_load = loop;
2994
  if (at_loop)
2995
    *at_loop = loop_for_initial_load;
2996
 
2997
  /* 3. For the case of the optimized realignment, create the first vector
2998
      load at the loop preheader.  */
2999
 
3000
  if (alignment_support_scheme == dr_explicit_realign_optimized)
3001
    {
3002
      /* Create msq_init = *(floor(p1)) in the loop preheader  */
3003
 
3004
      gcc_assert (!compute_in_loop);
3005
      pe = loop_preheader_edge (loop_for_initial_load);
3006
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
3007
      ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3008
                                      &init_addr, &inc, true, &inv_p);
3009
      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3010
      new_stmt = gimple_build_assign (vec_dest, data_ref);
3011
      new_temp = make_ssa_name (vec_dest, new_stmt);
3012
      gimple_assign_set_lhs (new_stmt, new_temp);
3013
      mark_symbols_for_renaming (new_stmt);
3014
      new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3015
      gcc_assert (!new_bb);
3016
      msq_init = gimple_assign_lhs (new_stmt);
3017
    }
3018
 
3019
  /* 4. Create realignment token using a target builtin, if available.
3020
      It is done either inside the containing loop, or before LOOP (as
3021
      determined above).  */
3022
 
3023
  if (targetm.vectorize.builtin_mask_for_load)
3024
    {
3025
      tree builtin_decl;
3026
 
3027
      /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
3028
      if (compute_in_loop)
3029
        gcc_assert (init_addr); /* already computed by the caller.  */
3030
      else
3031
        {
3032
          /* Generate the INIT_ADDR computation outside LOOP.  */
3033
          init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3034
                                                        NULL_TREE, loop);
3035
          pe = loop_preheader_edge (loop);
3036
          new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3037
          gcc_assert (!new_bb);
3038
        }
3039
 
3040
      builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3041
      new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3042
      vec_dest =
3043
        vect_create_destination_var (scalar_dest,
3044
                                     gimple_call_return_type (new_stmt));
3045
      new_temp = make_ssa_name (vec_dest, new_stmt);
3046
      gimple_call_set_lhs (new_stmt, new_temp);
3047
 
3048
      if (compute_in_loop)
3049
        gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3050
      else
3051
        {
3052
          /* Generate the misalignment computation outside LOOP.  */
3053
          pe = loop_preheader_edge (loop);
3054
          new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3055
          gcc_assert (!new_bb);
3056
        }
3057
 
3058
      *realignment_token = gimple_call_lhs (new_stmt);
3059
 
3060
      /* The result of the CALL_EXPR to this builtin is determined from
3061
         the value of the parameter and no global variables are touched
3062
         which makes the builtin a "const" function.  Requiring the
3063
         builtin to have the "const" attribute makes it unnecessary
3064
         to call mark_call_clobbered.  */
3065
      gcc_assert (TREE_READONLY (builtin_decl));
3066
    }
3067
 
3068
  if (alignment_support_scheme == dr_explicit_realign)
3069
    return msq;
3070
 
3071
  gcc_assert (!compute_in_loop);
3072
  gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3073
 
3074
 
3075
  /* 5. Create msq = phi <msq_init, lsq> in loop  */
3076
 
3077
  pe = loop_preheader_edge (containing_loop);
3078
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3079
  msq = make_ssa_name (vec_dest, NULL);
3080
  phi_stmt = create_phi_node (msq, containing_loop->header);
3081
  SSA_NAME_DEF_STMT (msq) = phi_stmt;
3082
  add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3083
 
3084
  return msq;
3085
}
3086
 
3087
 
3088
/* Function vect_strided_load_supported.
3089
 
3090
   Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3091
   and FALSE otherwise.  */
3092
 
3093
bool
3094
vect_strided_load_supported (tree vectype)
3095
{
3096
  optab perm_even_optab, perm_odd_optab;
3097
  int mode;
3098
 
3099
  mode = (int) TYPE_MODE (vectype);
3100
 
3101
  perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3102
                                         optab_default);
3103
  if (!perm_even_optab)
3104
    {
3105
      if (vect_print_dump_info (REPORT_DETAILS))
3106
        fprintf (vect_dump, "no optab for perm_even.");
3107
      return false;
3108
    }
3109
 
3110
  if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3111
    {
3112
      if (vect_print_dump_info (REPORT_DETAILS))
3113
        fprintf (vect_dump, "perm_even op not supported by target.");
3114
      return false;
3115
    }
3116
 
3117
  perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3118
                                        optab_default);
3119
  if (!perm_odd_optab)
3120
    {
3121
      if (vect_print_dump_info (REPORT_DETAILS))
3122
        fprintf (vect_dump, "no optab for perm_odd.");
3123
      return false;
3124
    }
3125
 
3126
  if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3127
    {
3128
      if (vect_print_dump_info (REPORT_DETAILS))
3129
        fprintf (vect_dump, "perm_odd op not supported by target.");
3130
      return false;
3131
    }
3132
  return true;
3133
}
3134
 
3135
 
3136
/* Function vect_permute_load_chain.
3137
 
3138
   Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3139
   a power of 2, generate extract_even/odd stmts to reorder the input data
3140
   correctly. Return the final references for loads in RESULT_CHAIN.
3141
 
3142
   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3143
   The input is 4 vectors each containing 8 elements. We assign a number to each
3144
   element, the input sequence is:
3145
 
3146
   1st vec:   0  1  2  3  4  5  6  7
3147
   2nd vec:   8  9 10 11 12 13 14 15
3148
   3rd vec:  16 17 18 19 20 21 22 23
3149
   4th vec:  24 25 26 27 28 29 30 31
3150
 
3151
   The output sequence should be:
3152
 
3153
   1st vec:  0 4  8 12 16 20 24 28
3154
   2nd vec:  1 5  9 13 17 21 25 29
3155
   3rd vec:  2 6 10 14 18 22 26 30
3156
   4th vec:  3 7 11 15 19 23 27 31
3157
 
3158
   i.e., the first output vector should contain the first elements of each
3159
   interleaving group, etc.
3160
 
3161
   We use extract_even/odd instructions to create such output. The input of each
3162
   extract_even/odd operation is two vectors
3163
   1st vec    2nd vec
3164
 
3165
 
3166
   and the output is the vector of extracted even/odd elements. The output of
3167
   extract_even will be:   0 2 4 6
3168
   and of extract_odd:     1 3 5 7
3169
 
3170
 
3171
   The permutation is done in log LENGTH stages. In each stage extract_even and
3172
   extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3173
   order. In our example,
3174
 
3175
   E1: extract_even (1st vec, 2nd vec)
3176
   E2: extract_odd (1st vec, 2nd vec)
3177
   E3: extract_even (3rd vec, 4th vec)
3178
   E4: extract_odd (3rd vec, 4th vec)
3179
 
3180
   The output for the first stage will be:
3181
 
3182
   E1:  0  2  4  6  8 10 12 14
3183
   E2:  1  3  5  7  9 11 13 15
3184
   E3: 16 18 20 22 24 26 28 30
3185
   E4: 17 19 21 23 25 27 29 31
3186
 
3187
   In order to proceed and create the correct sequence for the next stage (or
3188
   for the correct output, if the second stage is the last one, as in our
3189
   example), we first put the output of extract_even operation and then the
3190
   output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3191
   The input for the second stage is:
3192
 
3193
   1st vec (E1):  0  2  4  6  8 10 12 14
3194
   2nd vec (E3): 16 18 20 22 24 26 28 30
3195
   3rd vec (E2):  1  3  5  7  9 11 13 15
3196
   4th vec (E4): 17 19 21 23 25 27 29 31
3197
 
3198
   The output of the second stage:
3199
 
3200
   E1: 0 4  8 12 16 20 24 28
3201
   E2: 2 6 10 14 18 22 26 30
3202
   E3: 1 5  9 13 17 21 25 29
3203
   E4: 3 7 11 15 19 23 27 31
3204
 
3205
   And RESULT_CHAIN after reordering:
3206
 
3207
   1st vec (E1):  0 4  8 12 16 20 24 28
3208
   2nd vec (E3):  1 5  9 13 17 21 25 29
3209
   3rd vec (E2):  2 6 10 14 18 22 26 30
3210
   4th vec (E4):  3 7 11 15 19 23 27 31.  */
3211
 
3212
bool
3213
vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3214
                         unsigned int length,
3215
                         gimple stmt,
3216
                         gimple_stmt_iterator *gsi,
3217
                         VEC(tree,heap) **result_chain)
3218
{
3219
  tree perm_dest, data_ref, first_vect, second_vect;
3220
  gimple perm_stmt;
3221
  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3222
  int i;
3223
  unsigned int j;
3224
 
3225
  /* Check that the operation is supported.  */
3226
  if (!vect_strided_load_supported (vectype))
3227
    return false;
3228
 
3229
  *result_chain = VEC_copy (tree, heap, dr_chain);
3230
  for (i = 0; i < exact_log2 (length); i++)
3231
    {
3232
      for (j = 0; j < length; j +=2)
3233
        {
3234
          first_vect = VEC_index (tree, dr_chain, j);
3235
          second_vect = VEC_index (tree, dr_chain, j+1);
3236
 
3237
          /* data_ref = permute_even (first_data_ref, second_data_ref);  */
3238
          perm_dest = create_tmp_var (vectype, "vect_perm_even");
3239
          DECL_GIMPLE_REG_P (perm_dest) = 1;
3240
          add_referenced_var (perm_dest);
3241
 
3242
          perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3243
                                                    perm_dest, first_vect,
3244
                                                    second_vect);
3245
 
3246
          data_ref = make_ssa_name (perm_dest, perm_stmt);
3247
          gimple_assign_set_lhs (perm_stmt, data_ref);
3248
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3249
          mark_symbols_for_renaming (perm_stmt);
3250
 
3251
          VEC_replace (tree, *result_chain, j/2, data_ref);
3252
 
3253
          /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
3254
          perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3255
          DECL_GIMPLE_REG_P (perm_dest) = 1;
3256
          add_referenced_var (perm_dest);
3257
 
3258
          perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3259
                                                    perm_dest, first_vect,
3260
                                                    second_vect);
3261
          data_ref = make_ssa_name (perm_dest, perm_stmt);
3262
          gimple_assign_set_lhs (perm_stmt, data_ref);
3263
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3264
          mark_symbols_for_renaming (perm_stmt);
3265
 
3266
          VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3267
        }
3268
      dr_chain = VEC_copy (tree, heap, *result_chain);
3269
    }
3270
  return true;
3271
}
3272
 
3273
 
3274
/* Function vect_transform_strided_load.
3275
 
3276
   Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3277
   to perform their permutation and ascribe the result vectorized statements to
3278
   the scalar statements.
3279
*/
3280
 
3281
bool
3282
vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3283
                             gimple_stmt_iterator *gsi)
3284
{
3285
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3286
  gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3287
  gimple next_stmt, new_stmt;
3288
  VEC(tree,heap) *result_chain = NULL;
3289
  unsigned int i, gap_count;
3290
  tree tmp_data_ref;
3291
 
3292
  /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3293
     RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3294
     vectors, that are ready for vector computation.  */
3295
  result_chain = VEC_alloc (tree, heap, size);
3296
  /* Permute.  */
3297
  if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3298
    return false;
3299
 
3300
  /* Put a permuted data-ref in the VECTORIZED_STMT field.
3301
     Since we scan the chain starting from it's first node, their order
3302
     corresponds the order of data-refs in RESULT_CHAIN.  */
3303
  next_stmt = first_stmt;
3304
  gap_count = 1;
3305
  for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3306
    {
3307
      if (!next_stmt)
3308
        break;
3309
 
3310
      /* Skip the gaps. Loads created for the gaps will be removed by dead
3311
       code elimination pass later. No need to check for the first stmt in
3312
       the group, since it always exists.
3313
       DR_GROUP_GAP is the number of steps in elements from the previous
3314
       access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3315
       correspond to the gaps.
3316
      */
3317
      if (next_stmt != first_stmt
3318
          && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3319
      {
3320
        gap_count++;
3321
        continue;
3322
      }
3323
 
3324
      while (next_stmt)
3325
        {
3326
          new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3327
          /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3328
             copies, and we put the new vector statement in the first available
3329
             RELATED_STMT.  */
3330
          if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3331
            STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3332
          else
3333
            {
3334
              if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3335
                {
3336
                  gimple prev_stmt =
3337
                    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3338
                  gimple rel_stmt =
3339
                    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3340
                  while (rel_stmt)
3341
                    {
3342
                      prev_stmt = rel_stmt;
3343
                      rel_stmt =
3344
                        STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3345
                    }
3346
 
3347
                  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3348
                    new_stmt;
3349
                }
3350
            }
3351
 
3352
          next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3353
          gap_count = 1;
3354
          /* If NEXT_STMT accesses the same DR as the previous statement,
3355
             put the same TMP_DATA_REF as its vectorized statement; otherwise
3356
             get the next data-ref from RESULT_CHAIN.  */
3357
          if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3358
            break;
3359
        }
3360
    }
3361
 
3362
  VEC_free (tree, heap, result_chain);
3363
  return true;
3364
}
3365
 
3366
/* Function vect_force_dr_alignment_p.
3367
 
3368
   Returns whether the alignment of a DECL can be forced to be aligned
3369
   on ALIGNMENT bit boundary.  */
3370
 
3371
bool
3372
vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3373
{
3374
  if (TREE_CODE (decl) != VAR_DECL)
3375
    return false;
3376
 
3377
  if (DECL_EXTERNAL (decl))
3378
    return false;
3379
 
3380
  if (TREE_ASM_WRITTEN (decl))
3381
    return false;
3382
 
3383
  if (TREE_STATIC (decl))
3384
    return (alignment <= MAX_OFILE_ALIGNMENT);
3385
  else
3386
    return (alignment <= MAX_STACK_ALIGNMENT);
3387
}
3388
 
3389
/* Function vect_supportable_dr_alignment
3390
 
3391
   Return whether the data reference DR is supported with respect to its
3392
   alignment.  */
3393
 
3394
enum dr_alignment_support
3395
vect_supportable_dr_alignment (struct data_reference *dr)
3396
{
3397
  gimple stmt = DR_STMT (dr);
3398
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3399
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3400
  enum machine_mode mode = TYPE_MODE (vectype);
3401
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3402
  struct loop *vect_loop = NULL;
3403
  bool nested_in_vect_loop = false;
3404
 
3405
  if (aligned_access_p (dr))
3406
    return dr_aligned;
3407
 
3408
  if (!loop_vinfo)
3409
    /* FORNOW: Misaligned accesses are supported only in loops.  */
3410
    return dr_unaligned_unsupported;
3411
 
3412
  vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3413
  nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3414
 
3415
  /* Possibly unaligned access.  */
3416
 
3417
  /* We can choose between using the implicit realignment scheme (generating
3418
     a misaligned_move stmt) and the explicit realignment scheme (generating
3419
     aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3420
     realignment scheme: optimized, and unoptimized.
3421
     We can optimize the realignment only if the step between consecutive
3422
     vector loads is equal to the vector size.  Since the vector memory
3423
     accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3424
     is guaranteed that the misalignment amount remains the same throughout the
3425
     execution of the vectorized loop.  Therefore, we can create the
3426
     "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3427
     at the loop preheader.
3428
 
3429
     However, in the case of outer-loop vectorization, when vectorizing a
3430
     memory access in the inner-loop nested within the LOOP that is now being
3431
     vectorized, while it is guaranteed that the misalignment of the
3432
     vectorized memory access will remain the same in different outer-loop
3433
     iterations, it is *not* guaranteed that is will remain the same throughout
3434
     the execution of the inner-loop.  This is because the inner-loop advances
3435
     with the original scalar step (and not in steps of VS).  If the inner-loop
3436
     step happens to be a multiple of VS, then the misalignment remains fixed
3437
     and we can use the optimized realignment scheme.  For example:
3438
 
3439
      for (i=0; i<N; i++)
3440
        for (j=0; j<M; j++)
3441
          s += a[i+j];
3442
 
3443
     When vectorizing the i-loop in the above example, the step between
3444
     consecutive vector loads is 1, and so the misalignment does not remain
3445
     fixed across the execution of the inner-loop, and the realignment cannot
3446
     be optimized (as illustrated in the following pseudo vectorized loop):
3447
 
3448
      for (i=0; i<N; i+=4)
3449
        for (j=0; j<M; j++){
3450
          vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3451
                         // when j is {0,1,2,3,4,5,6,7,...} respectively.
3452
                         // (assuming that we start from an aligned address).
3453
          }
3454
 
3455
     We therefore have to use the unoptimized realignment scheme:
3456
 
3457
      for (i=0; i<N; i+=4)
3458
          for (j=k; j<M; j+=4)
3459
          vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3460
                           // that the misalignment of the initial address is
3461
                           // 0).
3462
 
3463
     The loop can then be vectorized as follows:
3464
 
3465
      for (k=0; k<4; k++){
3466
        rt = get_realignment_token (&vp[k]);
3467
        for (i=0; i<N; i+=4){
3468
          v1 = vp[i+k];
3469
          for (j=k; j<M; j+=4){
3470
            v2 = vp[i+j+VS-1];
3471
            va = REALIGN_LOAD <v1,v2,rt>;
3472
            vs += va;
3473
            v1 = v2;
3474
          }
3475
        }
3476
    } */
3477
 
3478
  if (DR_IS_READ (dr))
3479
    {
3480
      bool is_packed = false;
3481
      tree type = (TREE_TYPE (DR_REF (dr)));
3482
 
3483
      if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3484
                                                             CODE_FOR_nothing
3485
          && (!targetm.vectorize.builtin_mask_for_load
3486
              || targetm.vectorize.builtin_mask_for_load ()))
3487
        {
3488
          tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3489
          if (nested_in_vect_loop
3490
              && (TREE_INT_CST_LOW (DR_STEP (dr))
3491
                  != GET_MODE_SIZE (TYPE_MODE (vectype))))
3492
            return dr_explicit_realign;
3493
          else
3494
            return dr_explicit_realign_optimized;
3495
        }
3496
      if (!known_alignment_for_access_p (dr))
3497
        {
3498
          tree ba = DR_BASE_OBJECT (dr);
3499
 
3500
          if (ba)
3501
            is_packed = contains_packed_reference (ba);
3502
        }
3503
 
3504
      if (targetm.vectorize.
3505
          builtin_support_vector_misalignment (mode, type,
3506
                                               DR_MISALIGNMENT (dr), is_packed))
3507
        /* Can't software pipeline the loads, but can at least do them.  */
3508
        return dr_unaligned_supported;
3509
    }
3510
  else
3511
    {
3512
      bool is_packed = false;
3513
      tree type = (TREE_TYPE (DR_REF (dr)));
3514
 
3515
      if (!known_alignment_for_access_p (dr))
3516
        {
3517
          tree ba = DR_BASE_OBJECT (dr);
3518
 
3519
          if (ba)
3520
            is_packed = contains_packed_reference (ba);
3521
        }
3522
 
3523
     if (targetm.vectorize.
3524
         builtin_support_vector_misalignment (mode, type,
3525
                                              DR_MISALIGNMENT (dr), is_packed))
3526
       return dr_unaligned_supported;
3527
    }
3528
 
3529
  /* Unsupported.  */
3530
  return dr_unaligned_unsupported;
3531
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.