OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-vect-data-refs.c] - Blame information for rev 774

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Data References Analysis and Manipulation Utilities for Vectorization.
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
   and Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "tm_p.h"
30
#include "target.h"
31
#include "basic-block.h"
32
#include "tree-pretty-print.h"
33
#include "gimple-pretty-print.h"
34
#include "tree-flow.h"
35
#include "tree-dump.h"
36
#include "cfgloop.h"
37
#include "tree-chrec.h"
38
#include "tree-scalar-evolution.h"
39
#include "tree-vectorizer.h"
40
#include "diagnostic-core.h"
41
 
42
/* Need to include rtl.h, expr.h, etc. for optabs.  */
43
#include "expr.h"
44
#include "optabs.h"
45
 
46
/* Return true if load- or store-lanes optab OPTAB is implemented for
47
   COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
48
 
49
static bool
50
vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51
                              tree vectype, unsigned HOST_WIDE_INT count)
52
{
53
  enum machine_mode mode, array_mode;
54
  bool limit_p;
55
 
56
  mode = TYPE_MODE (vectype);
57
  limit_p = !targetm.array_mode_supported_p (mode, count);
58
  array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59
                              MODE_INT, limit_p);
60
 
61
  if (array_mode == BLKmode)
62
    {
63
      if (vect_print_dump_info (REPORT_DETAILS))
64
        fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65
                 GET_MODE_NAME (mode), count);
66
      return false;
67
    }
68
 
69
  if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
70
    {
71
      if (vect_print_dump_info (REPORT_DETAILS))
72
        fprintf (vect_dump, "cannot use %s<%s><%s>",
73
                 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74
      return false;
75
    }
76
 
77
  if (vect_print_dump_info (REPORT_DETAILS))
78
    fprintf (vect_dump, "can use %s<%s><%s>",
79
             name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
80
 
81
  return true;
82
}
83
 
84
 
85
/* Return the smallest scalar part of STMT.
86
   This is used to determine the vectype of the stmt.  We generally set the
87
   vectype according to the type of the result (lhs).  For stmts whose
88
   result-type is different than the type of the arguments (e.g., demotion,
89
   promotion), vectype will be reset appropriately (later).  Note that we have
90
   to visit the smallest datatype in this function, because that determines the
91
   VF.  If the smallest datatype in the loop is present only as the rhs of a
92
   promotion operation - we'd miss it.
93
   Such a case, where a variable of this datatype does not appear in the lhs
94
   anywhere in the loop, can only occur if it's an invariant: e.g.:
95
   'int_x = (int) short_inv', which we'd expect to have been optimized away by
96
   invariant motion.  However, we cannot rely on invariant motion to always
97
   take invariants out of the loop, and so in the case of promotion we also
98
   have to check the rhs.
99
   LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100
   types.  */
101
 
102
tree
103
vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104
                               HOST_WIDE_INT *rhs_size_unit)
105
{
106
  tree scalar_type = gimple_expr_type (stmt);
107
  HOST_WIDE_INT lhs, rhs;
108
 
109
  lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
110
 
111
  if (is_gimple_assign (stmt)
112
      && (gimple_assign_cast_p (stmt)
113
          || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114
          || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
115
    {
116
      tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
117
 
118
      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119
      if (rhs < lhs)
120
        scalar_type = rhs_type;
121
    }
122
 
123
  *lhs_size_unit = lhs;
124
  *rhs_size_unit = rhs;
125
  return scalar_type;
126
}
127
 
128
 
129
/* Find the place of the data-ref in STMT in the interleaving chain that starts
130
   from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
131
 
132
int
133
vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
134
{
135
  gimple next_stmt = first_stmt;
136
  int result = 0;
137
 
138
  if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139
    return -1;
140
 
141
  while (next_stmt && next_stmt != stmt)
142
    {
143
      result++;
144
      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
145
    }
146
 
147
  if (next_stmt)
148
    return result;
149
  else
150
    return -1;
151
}
152
 
153
 
154
/* Function vect_insert_into_interleaving_chain.
155
 
156
   Insert DRA into the interleaving chain of DRB according to DRA's INIT.  */
157
 
158
static void
159
vect_insert_into_interleaving_chain (struct data_reference *dra,
160
                                     struct data_reference *drb)
161
{
162
  gimple prev, next;
163
  tree next_init;
164
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
166
 
167
  prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168
  next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169
  while (next)
170
    {
171
      next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172
      if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
173
        {
174
          /* Insert here.  */
175
          GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176
          GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177
          return;
178
        }
179
      prev = next;
180
      next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
181
    }
182
 
183
  /* We got to the end of the list. Insert here.  */
184
  GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185
  GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
186
}
187
 
188
 
189
/* Function vect_update_interleaving_chain.
190
 
191
   For two data-refs DRA and DRB that are a part of a chain interleaved data
192
   accesses, update the interleaving chain.  DRB's INIT is smaller than DRA's.
193
 
194
   There are four possible cases:
195
   1. New stmts - both DRA and DRB are not a part of any chain:
196
      FIRST_DR = DRB
197
      NEXT_DR (DRB) = DRA
198
   2. DRB is a part of a chain and DRA is not:
199
      no need to update FIRST_DR
200
      no need to insert DRB
201
      insert DRA according to init
202
   3. DRA is a part of a chain and DRB is not:
203
      if (init of FIRST_DR > init of DRB)
204
          FIRST_DR = DRB
205
          NEXT(FIRST_DR) = previous FIRST_DR
206
      else
207
          insert DRB according to its init
208
   4. both DRA and DRB are in some interleaving chains:
209
      choose the chain with the smallest init of FIRST_DR
210
      insert the nodes of the second chain into the first one.  */
211
 
212
static void
213
vect_update_interleaving_chain (struct data_reference *drb,
214
                                struct data_reference *dra)
215
{
216
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218
  tree next_init, init_dra_chain, init_drb_chain;
219
  gimple first_a, first_b;
220
  tree node_init;
221
  gimple node, prev, next, first_stmt;
222
 
223
  /* 1. New stmts - both DRA and DRB are not a part of any chain.   */
224
  if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
225
    {
226
      GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227
      GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228
      GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229
      return;
230
    }
231
 
232
  /* 2. DRB is a part of a chain and DRA is not.  */
233
  if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
234
    {
235
      GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236
      /* Insert DRA into the chain of DRB.  */
237
      vect_insert_into_interleaving_chain (dra, drb);
238
      return;
239
    }
240
 
241
  /* 3. DRA is a part of a chain and DRB is not.  */
242
  if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
243
    {
244
      gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245
      tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246
                                                              old_first_stmt)));
247
      gimple tmp;
248
 
249
      if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
250
        {
251
          /* DRB's init is smaller than the init of the stmt previously marked
252
             as the first stmt of the interleaving chain of DRA.  Therefore, we
253
             update FIRST_STMT and put DRB in the head of the list.  */
254
          GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255
          GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
256
 
257
          /* Update all the stmts in the list to point to the new FIRST_STMT.  */
258
          tmp = old_first_stmt;
259
          while (tmp)
260
            {
261
              GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262
              tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
263
            }
264
        }
265
      else
266
        {
267
          /* Insert DRB in the list of DRA.  */
268
          vect_insert_into_interleaving_chain (drb, dra);
269
          GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
270
        }
271
      return;
272
    }
273
 
274
  /* 4. both DRA and DRB are in some interleaving chains.  */
275
  first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276
  first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277
  if (first_a == first_b)
278
    return;
279
  init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280
  init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
281
 
282
  if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
283
    {
284
      /* Insert the nodes of DRA chain into the DRB chain.
285
         After inserting a node, continue from this node of the DRB chain (don't
286
         start from the beginning.  */
287
      node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288
      prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289
      first_stmt = first_b;
290
    }
291
  else
292
    {
293
      /* Insert the nodes of DRB chain into the DRA chain.
294
         After inserting a node, continue from this node of the DRA chain (don't
295
         start from the beginning.  */
296
      node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297
      prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298
      first_stmt = first_a;
299
    }
300
 
301
  while (node)
302
    {
303
      node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304
      next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305
      while (next)
306
        {
307
          next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308
          if (tree_int_cst_compare (next_init, node_init) > 0)
309
            {
310
              /* Insert here.  */
311
              GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312
              GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313
              prev = node;
314
              break;
315
            }
316
          prev = next;
317
          next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
318
        }
319
      if (!next)
320
        {
321
          /* We got to the end of the list. Insert here.  */
322
          GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323
          GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324
          prev = node;
325
        }
326
      GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327
      node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
328
    }
329
}
330
 
331
/* Check dependence between DRA and DRB for basic block vectorization.
332
   If the accesses share same bases and offsets, we can compare their initial
333
   constant offsets to decide whether they differ or not.  In case of a read-
334
   write dependence we check that the load is before the store to ensure that
335
   vectorization will not change the order of the accesses.  */
336
 
337
static bool
338
vect_drs_dependent_in_basic_block (struct data_reference *dra,
339
                                   struct data_reference *drb)
340
{
341
  HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342
  gimple earlier_stmt;
343
 
344
  /* We only call this function for pairs of loads and stores, but we verify
345
     it here.  */
346
  if (DR_IS_READ (dra) == DR_IS_READ (drb))
347
    {
348
      if (DR_IS_READ (dra))
349
        return false;
350
      else
351
        return true;
352
    }
353
 
354
  /* Check that the data-refs have same bases and offsets.  If not, we can't
355
     determine if they are dependent.  */
356
  if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357
      || !dr_equal_offsets_p (dra, drb))
358
    return true;
359
 
360
  /* Check the types.  */
361
  type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362
  type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
363
 
364
  if (type_size_a != type_size_b
365
      || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366
                              TREE_TYPE (DR_REF (drb))))
367
    return true;
368
 
369
  init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370
  init_b = TREE_INT_CST_LOW (DR_INIT (drb));
371
 
372
  /* Two different locations - no dependence.  */
373
  if (init_a != init_b)
374
    return false;
375
 
376
  /* We have a read-write dependence.  Check that the load is before the store.
377
     When we vectorize basic blocks, vector load can be only before
378
     corresponding scalar load, and vector store can be only after its
379
     corresponding scalar store.  So the order of the acceses is preserved in
380
     case the load is before the store.  */
381
  earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382
  if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383
    return false;
384
 
385
  return true;
386
}
387
 
388
 
389
/* Function vect_check_interleaving.
390
 
391
   Check if DRA and DRB are a part of interleaving.  In case they are, insert
392
   DRA and DRB in an interleaving chain.  */
393
 
394
static bool
395
vect_check_interleaving (struct data_reference *dra,
396
                         struct data_reference *drb)
397
{
398
  HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
399
 
400
  /* Check that the data-refs have same first location (except init) and they
401
     are both either store or load (not load and store).  */
402
  if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403
      || !dr_equal_offsets_p (dra, drb)
404
      || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405
      || DR_IS_READ (dra) != DR_IS_READ (drb))
406
    return false;
407
 
408
  /* Check:
409
     1. data-refs are of the same type
410
     2. their steps are equal
411
     3. the step (if greater than zero) is greater than the difference between
412
        data-refs' inits.  */
413
  type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414
  type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
415
 
416
  if (type_size_a != type_size_b
417
      || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418
      || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419
                              TREE_TYPE (DR_REF (drb))))
420
    return false;
421
 
422
  init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423
  init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424
  step = TREE_INT_CST_LOW (DR_STEP (dra));
425
 
426
  if (init_a > init_b)
427
    {
428
      /* If init_a == init_b + the size of the type * k, we have an interleaving,
429
         and DRB is accessed before DRA.  */
430
      diff_mod_size = (init_a - init_b) % type_size_a;
431
 
432
      if (step && (init_a - init_b) > step)
433
         return false;
434
 
435
      if (diff_mod_size == 0)
436
        {
437
          vect_update_interleaving_chain (drb, dra);
438
          if (vect_print_dump_info (REPORT_DR_DETAILS))
439
            {
440
              fprintf (vect_dump, "Detected interleaving ");
441
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442
              fprintf (vect_dump, " and ");
443
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
444
            }
445
          return true;
446
        }
447
    }
448
  else
449
    {
450
      /* If init_b == init_a + the size of the type * k, we have an
451
         interleaving, and DRA is accessed before DRB.  */
452
      diff_mod_size = (init_b - init_a) % type_size_a;
453
 
454
      if (step && (init_b - init_a) > step)
455
         return false;
456
 
457
      if (diff_mod_size == 0)
458
        {
459
          vect_update_interleaving_chain (dra, drb);
460
          if (vect_print_dump_info (REPORT_DR_DETAILS))
461
            {
462
              fprintf (vect_dump, "Detected interleaving ");
463
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464
              fprintf (vect_dump, " and ");
465
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
466
            }
467
          return true;
468
        }
469
    }
470
 
471
  return false;
472
}
473
 
474
/* Check if data references pointed by DR_I and DR_J are same or
475
   belong to same interleaving group.  Return FALSE if drs are
476
   different, otherwise return TRUE.  */
477
 
478
static bool
479
vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
480
{
481
  gimple stmt_i = DR_STMT (dr_i);
482
  gimple stmt_j = DR_STMT (dr_j);
483
 
484
  if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485
      || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486
            && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487
            && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488
                == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489
    return true;
490
  else
491
    return false;
492
}
493
 
494
/* If address ranges represented by DDR_I and DDR_J are equal,
495
   return TRUE, otherwise return FALSE.  */
496
 
497
static bool
498
vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
499
{
500
  if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501
       && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502
      || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503
          && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504
    return true;
505
  else
506
    return false;
507
}
508
 
509
/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510
   tested at run-time.  Return TRUE if DDR was successfully inserted.
511
   Return false if versioning is not supported.  */
512
 
513
static bool
514
vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
515
{
516
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
517
 
518
  if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519
    return false;
520
 
521
  if (vect_print_dump_info (REPORT_DR_DETAILS))
522
    {
523
      fprintf (vect_dump, "mark for run-time aliasing test between ");
524
      print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525
      fprintf (vect_dump, " and ");
526
      print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
527
    }
528
 
529
  if (optimize_loop_nest_for_size_p (loop))
530
    {
531
      if (vect_print_dump_info (REPORT_DR_DETAILS))
532
        fprintf (vect_dump, "versioning not supported when optimizing for size.");
533
      return false;
534
    }
535
 
536
  /* FORNOW: We don't support versioning with outer-loop vectorization.  */
537
  if (loop->inner)
538
    {
539
      if (vect_print_dump_info (REPORT_DR_DETAILS))
540
        fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541
      return false;
542
    }
543
 
544
  VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545
  return true;
546
}
547
 
548
 
549
/* Function vect_analyze_data_ref_dependence.
550
 
551
   Return TRUE if there (might) exist a dependence between a memory-reference
552
   DRA and a memory-reference DRB.  When versioning for alias may check a
553
   dependence at run-time, return FALSE.  Adjust *MAX_VF according to
554
   the data dependence.  */
555
 
556
static bool
557
vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558
                                  loop_vec_info loop_vinfo, int *max_vf)
559
{
560
  unsigned int i;
561
  struct loop *loop = NULL;
562
  struct data_reference *dra = DDR_A (ddr);
563
  struct data_reference *drb = DDR_B (ddr);
564
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
565
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
566
  lambda_vector dist_v;
567
  unsigned int loop_depth;
568
 
569
  /* Don't bother to analyze statements marked as unvectorizable.  */
570
  if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
571
      || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
572
    return false;
573
 
574
  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
575
    {
576
      /* Independent data accesses.  */
577
      vect_check_interleaving (dra, drb);
578
      return false;
579
    }
580
 
581
  if (loop_vinfo)
582
    loop = LOOP_VINFO_LOOP (loop_vinfo);
583
 
584
  if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
585
    return false;
586
 
587
  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
588
    {
589
      gimple earlier_stmt;
590
 
591
      if (loop_vinfo)
592
        {
593
          if (vect_print_dump_info (REPORT_DR_DETAILS))
594
            {
595
              fprintf (vect_dump, "versioning for alias required: "
596
                                  "can't determine dependence between ");
597
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
598
              fprintf (vect_dump, " and ");
599
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
600
            }
601
 
602
          /* Add to list of ddrs that need to be tested at run-time.  */
603
          return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
604
        }
605
 
606
      /* When vectorizing a basic block unknown depnedence can still mean
607
         strided access.  */
608
      if (vect_check_interleaving (dra, drb))
609
         return false;
610
 
611
      /* Read-read is OK (we need this check here, after checking for
612
         interleaving).  */
613
      if (DR_IS_READ (dra) && DR_IS_READ (drb))
614
        return false;
615
 
616
      if (vect_print_dump_info (REPORT_DR_DETAILS))
617
        {
618
          fprintf (vect_dump, "can't determine dependence between ");
619
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
620
          fprintf (vect_dump, " and ");
621
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
622
        }
623
 
624
      /* We do not vectorize basic blocks with write-write dependencies.  */
625
      if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
626
        return true;
627
 
628
      /* Check that it's not a load-after-store dependence.  */
629
      earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
630
      if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
631
        return true;
632
 
633
      return false;
634
    }
635
 
636
  /* Versioning for alias is not yet supported for basic block SLP, and
637
     dependence distance is unapplicable, hence, in case of known data
638
     dependence, basic block vectorization is impossible for now.  */
639
  if (!loop_vinfo)
640
    {
641
      if (dra != drb && vect_check_interleaving (dra, drb))
642
        return false;
643
 
644
      if (vect_print_dump_info (REPORT_DR_DETAILS))
645
        {
646
          fprintf (vect_dump, "determined dependence between ");
647
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
648
          fprintf (vect_dump, " and ");
649
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
650
        }
651
 
652
      /* Do not vectorize basic blcoks with write-write dependences.  */
653
      if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
654
        return true;
655
 
656
      /* Check if this dependence is allowed in basic block vectorization.  */
657
      return vect_drs_dependent_in_basic_block (dra, drb);
658
    }
659
 
660
  /* Loop-based vectorization and known data dependence.  */
661
  if (DDR_NUM_DIST_VECTS (ddr) == 0)
662
    {
663
      if (vect_print_dump_info (REPORT_DR_DETAILS))
664
        {
665
          fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
666
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
667
          fprintf (vect_dump, " and ");
668
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
669
        }
670
      /* Add to list of ddrs that need to be tested at run-time.  */
671
      return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
672
    }
673
 
674
  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
675
  FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
676
    {
677
      int dist = dist_v[loop_depth];
678
 
679
      if (vect_print_dump_info (REPORT_DR_DETAILS))
680
        fprintf (vect_dump, "dependence distance  = %d.", dist);
681
 
682
      if (dist == 0)
683
        {
684
          if (vect_print_dump_info (REPORT_DR_DETAILS))
685
            {
686
              fprintf (vect_dump, "dependence distance == 0 between ");
687
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
688
              fprintf (vect_dump, " and ");
689
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
690
            }
691
 
692
          /* For interleaving, mark that there is a read-write dependency if
693
             necessary. We check before that one of the data-refs is store.  */
694
          if (DR_IS_READ (dra))
695
            GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
696
          else
697
            {
698
              if (DR_IS_READ (drb))
699
                GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
700
            }
701
 
702
          continue;
703
        }
704
 
705
      if (dist > 0 && DDR_REVERSED_P (ddr))
706
        {
707
          /* If DDR_REVERSED_P the order of the data-refs in DDR was
708
             reversed (to make distance vector positive), and the actual
709
             distance is negative.  */
710
          if (vect_print_dump_info (REPORT_DR_DETAILS))
711
            fprintf (vect_dump, "dependence distance negative.");
712
          continue;
713
        }
714
 
715
      if (abs (dist) >= 2
716
          && abs (dist) < *max_vf)
717
        {
718
          /* The dependence distance requires reduction of the maximal
719
             vectorization factor.  */
720
          *max_vf = abs (dist);
721
          if (vect_print_dump_info (REPORT_DR_DETAILS))
722
            fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
723
                     *max_vf);
724
        }
725
 
726
      if (abs (dist) >= *max_vf)
727
        {
728
          /* Dependence distance does not create dependence, as far as
729
             vectorization is concerned, in this case.  */
730
          if (vect_print_dump_info (REPORT_DR_DETAILS))
731
            fprintf (vect_dump, "dependence distance >= VF.");
732
          continue;
733
        }
734
 
735
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
736
        {
737
          fprintf (vect_dump, "not vectorized, possible dependence "
738
                              "between data-refs ");
739
          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
740
          fprintf (vect_dump, " and ");
741
          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
742
        }
743
 
744
      return true;
745
    }
746
 
747
  return false;
748
}
749
 
750
/* Function vect_analyze_data_ref_dependences.
751
 
752
   Examine all the data references in the loop, and make sure there do not
753
   exist any data dependences between them.  Set *MAX_VF according to
754
   the maximum vectorization factor the data dependences allow.  */
755
 
756
bool
757
vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
758
                                   bb_vec_info bb_vinfo, int *max_vf)
759
{
760
  unsigned int i;
761
  VEC (ddr_p, heap) *ddrs = NULL;
762
  struct data_dependence_relation *ddr;
763
 
764
  if (vect_print_dump_info (REPORT_DETAILS))
765
    fprintf (vect_dump, "=== vect_analyze_dependences ===");
766
 
767
  if (loop_vinfo)
768
    ddrs = LOOP_VINFO_DDRS (loop_vinfo);
769
  else
770
    ddrs = BB_VINFO_DDRS (bb_vinfo);
771
 
772
  FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
773
    if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
774
      return false;
775
 
776
  return true;
777
}
778
 
779
 
780
/* Function vect_compute_data_ref_alignment
781
 
782
   Compute the misalignment of the data reference DR.
783
 
784
   Output:
785
   1. If during the misalignment computation it is found that the data reference
786
      cannot be vectorized then false is returned.
787
   2. DR_MISALIGNMENT (DR) is defined.
788
 
789
   FOR NOW: No analysis is actually performed. Misalignment is calculated
790
   only for trivial cases. TODO.  */
791
 
792
static bool
793
vect_compute_data_ref_alignment (struct data_reference *dr)
794
{
795
  gimple stmt = DR_STMT (dr);
796
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
798
  struct loop *loop = NULL;
799
  tree ref = DR_REF (dr);
800
  tree vectype;
801
  tree base, base_addr;
802
  bool base_aligned;
803
  tree misalign;
804
  tree aligned_to, alignment;
805
 
806
  if (vect_print_dump_info (REPORT_DETAILS))
807
    fprintf (vect_dump, "vect_compute_data_ref_alignment:");
808
 
809
  if (loop_vinfo)
810
    loop = LOOP_VINFO_LOOP (loop_vinfo);
811
 
812
  /* Initialize misalignment to unknown.  */
813
  SET_DR_MISALIGNMENT (dr, -1);
814
 
815
  misalign = DR_INIT (dr);
816
  aligned_to = DR_ALIGNED_TO (dr);
817
  base_addr = DR_BASE_ADDRESS (dr);
818
  vectype = STMT_VINFO_VECTYPE (stmt_info);
819
 
820
  /* In case the dataref is in an inner-loop of the loop that is being
821
     vectorized (LOOP), we use the base and misalignment information
822
     relative to the outer-loop (LOOP).  This is ok only if the misalignment
823
     stays the same throughout the execution of the inner-loop, which is why
824
     we have to check that the stride of the dataref in the inner-loop evenly
825
     divides by the vector size.  */
826
  if (loop && nested_in_vect_loop_p (loop, stmt))
827
    {
828
      tree step = DR_STEP (dr);
829
      HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
830
 
831
      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
832
        {
833
          if (vect_print_dump_info (REPORT_ALIGNMENT))
834
            fprintf (vect_dump, "inner step divides the vector-size.");
835
          misalign = STMT_VINFO_DR_INIT (stmt_info);
836
          aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
837
          base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
838
        }
839
      else
840
        {
841
          if (vect_print_dump_info (REPORT_ALIGNMENT))
842
            fprintf (vect_dump, "inner step doesn't divide the vector-size.");
843
          misalign = NULL_TREE;
844
        }
845
    }
846
 
847
  base = build_fold_indirect_ref (base_addr);
848
  alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
849
 
850
  if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
851
      || !misalign)
852
    {
853
      if (vect_print_dump_info (REPORT_ALIGNMENT))
854
        {
855
          fprintf (vect_dump, "Unknown alignment for access: ");
856
          print_generic_expr (vect_dump, base, TDF_SLIM);
857
        }
858
      return true;
859
    }
860
 
861
  if ((DECL_P (base)
862
       && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
863
                                alignment) >= 0)
864
      || (TREE_CODE (base_addr) == SSA_NAME
865
          && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
866
                                                      TREE_TYPE (base_addr)))),
867
                                   alignment) >= 0)
868
      || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
869
    base_aligned = true;
870
  else
871
    base_aligned = false;
872
 
873
  if (!base_aligned)
874
    {
875
      /* Do not change the alignment of global variables if
876
         flag_section_anchors is enabled.  */
877
      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
878
          || (TREE_STATIC (base) && flag_section_anchors))
879
        {
880
          if (vect_print_dump_info (REPORT_DETAILS))
881
            {
882
              fprintf (vect_dump, "can't force alignment of ref: ");
883
              print_generic_expr (vect_dump, ref, TDF_SLIM);
884
            }
885
          return true;
886
        }
887
 
888
      /* Force the alignment of the decl.
889
         NOTE: This is the only change to the code we make during
890
         the analysis phase, before deciding to vectorize the loop.  */
891
      if (vect_print_dump_info (REPORT_DETAILS))
892
        {
893
          fprintf (vect_dump, "force alignment of ");
894
          print_generic_expr (vect_dump, ref, TDF_SLIM);
895
        }
896
 
897
      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
898
      DECL_USER_ALIGN (base) = 1;
899
    }
900
 
901
  /* At this point we assume that the base is aligned.  */
902
  gcc_assert (base_aligned
903
              || (TREE_CODE (base) == VAR_DECL
904
                  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
905
 
906
  /* If this is a backward running DR then first access in the larger
907
     vectype actually is N-1 elements before the address in the DR.
908
     Adjust misalign accordingly.  */
909
  if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
910
    {
911
      tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
912
      /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
913
         otherwise we wouldn't be here.  */
914
      offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
915
      /* PLUS because DR_STEP was negative.  */
916
      misalign = size_binop (PLUS_EXPR, misalign, offset);
917
    }
918
 
919
  /* Modulo alignment.  */
920
  misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
921
 
922
  if (!host_integerp (misalign, 1))
923
    {
924
      /* Negative or overflowed misalignment value.  */
925
      if (vect_print_dump_info (REPORT_DETAILS))
926
        fprintf (vect_dump, "unexpected misalign value");
927
      return false;
928
    }
929
 
930
  SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
931
 
932
  if (vect_print_dump_info (REPORT_DETAILS))
933
    {
934
      fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
935
      print_generic_expr (vect_dump, ref, TDF_SLIM);
936
    }
937
 
938
  return true;
939
}
940
 
941
 
942
/* Function vect_compute_data_refs_alignment
943
 
944
   Compute the misalignment of data references in the loop.
945
   Return FALSE if a data reference is found that cannot be vectorized.  */
946
 
947
static bool
948
vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
949
                                  bb_vec_info bb_vinfo)
950
{
951
  VEC (data_reference_p, heap) *datarefs;
952
  struct data_reference *dr;
953
  unsigned int i;
954
 
955
  if (loop_vinfo)
956
    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
957
  else
958
    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
959
 
960
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
961
    if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
962
        && !vect_compute_data_ref_alignment (dr))
963
      {
964
        if (bb_vinfo)
965
          {
966
            /* Mark unsupported statement as unvectorizable.  */
967
            STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
968
            continue;
969
          }
970
        else
971
          return false;
972
      }
973
 
974
  return true;
975
}
976
 
977
 
978
/* Function vect_update_misalignment_for_peel
979
 
980
   DR - the data reference whose misalignment is to be adjusted.
981
   DR_PEEL - the data reference whose misalignment is being made
982
             zero in the vector loop by the peel.
983
   NPEEL - the number of iterations in the peel loop if the misalignment
984
           of DR_PEEL is known at compile time.  */
985
 
986
static void
987
vect_update_misalignment_for_peel (struct data_reference *dr,
988
                                   struct data_reference *dr_peel, int npeel)
989
{
990
  unsigned int i;
991
  VEC(dr_p,heap) *same_align_drs;
992
  struct data_reference *current_dr;
993
  int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
994
  int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
995
  stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
996
  stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
997
 
998
 /* For interleaved data accesses the step in the loop must be multiplied by
999
     the size of the interleaving group.  */
1000
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1001
    dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1002
  if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1003
    dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1004
 
1005
  /* It can be assumed that the data refs with the same alignment as dr_peel
1006
     are aligned in the vector loop.  */
1007
  same_align_drs
1008
    = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1009
  FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1010
    {
1011
      if (current_dr != dr)
1012
        continue;
1013
      gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1014
                  DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1015
      SET_DR_MISALIGNMENT (dr, 0);
1016
      return;
1017
    }
1018
 
1019
  if (known_alignment_for_access_p (dr)
1020
      && known_alignment_for_access_p (dr_peel))
1021
    {
1022
      bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1023
      int misal = DR_MISALIGNMENT (dr);
1024
      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1025
      misal += negative ? -npeel * dr_size : npeel * dr_size;
1026
      misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1027
      SET_DR_MISALIGNMENT (dr, misal);
1028
      return;
1029
    }
1030
 
1031
  if (vect_print_dump_info (REPORT_DETAILS))
1032
    fprintf (vect_dump, "Setting misalignment to -1.");
1033
  SET_DR_MISALIGNMENT (dr, -1);
1034
}
1035
 
1036
 
1037
/* Function vect_verify_datarefs_alignment
1038
 
1039
   Return TRUE if all data references in the loop can be
1040
   handled with respect to alignment.  */
1041
 
1042
bool
1043
vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1044
{
1045
  VEC (data_reference_p, heap) *datarefs;
1046
  struct data_reference *dr;
1047
  enum dr_alignment_support supportable_dr_alignment;
1048
  unsigned int i;
1049
 
1050
  if (loop_vinfo)
1051
    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1052
  else
1053
    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1054
 
1055
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1056
    {
1057
      gimple stmt = DR_STMT (dr);
1058
      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1059
 
1060
      /* For interleaving, only the alignment of the first access matters.
1061
         Skip statements marked as not vectorizable.  */
1062
      if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1063
           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1064
          || !STMT_VINFO_VECTORIZABLE (stmt_info))
1065
        continue;
1066
 
1067
      supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1068
      if (!supportable_dr_alignment)
1069
        {
1070
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1071
            {
1072
              if (DR_IS_READ (dr))
1073
                fprintf (vect_dump,
1074
                         "not vectorized: unsupported unaligned load.");
1075
              else
1076
                fprintf (vect_dump,
1077
                         "not vectorized: unsupported unaligned store.");
1078
 
1079
              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1080
            }
1081
          return false;
1082
        }
1083
      if (supportable_dr_alignment != dr_aligned
1084
          && vect_print_dump_info (REPORT_ALIGNMENT))
1085
        fprintf (vect_dump, "Vectorizing an unaligned access.");
1086
    }
1087
  return true;
1088
}
1089
 
1090
 
1091
/* Function vector_alignment_reachable_p
1092
 
1093
   Return true if vector alignment for DR is reachable by peeling
1094
   a few loop iterations.  Return false otherwise.  */
1095
 
1096
static bool
1097
vector_alignment_reachable_p (struct data_reference *dr)
1098
{
1099
  gimple stmt = DR_STMT (dr);
1100
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1101
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1102
 
1103
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1104
    {
1105
      /* For interleaved access we peel only if number of iterations in
1106
         the prolog loop ({VF - misalignment}), is a multiple of the
1107
         number of the interleaved accesses.  */
1108
      int elem_size, mis_in_elements;
1109
      int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1110
 
1111
      /* FORNOW: handle only known alignment.  */
1112
      if (!known_alignment_for_access_p (dr))
1113
        return false;
1114
 
1115
      elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1116
      mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1117
 
1118
      if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1119
        return false;
1120
    }
1121
 
1122
  /* If misalignment is known at the compile time then allow peeling
1123
     only if natural alignment is reachable through peeling.  */
1124
  if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1125
    {
1126
      HOST_WIDE_INT elmsize =
1127
                int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1128
      if (vect_print_dump_info (REPORT_DETAILS))
1129
        {
1130
          fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1131
          fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1132
        }
1133
      if (DR_MISALIGNMENT (dr) % elmsize)
1134
        {
1135
          if (vect_print_dump_info (REPORT_DETAILS))
1136
            fprintf (vect_dump, "data size does not divide the misalignment.\n");
1137
          return false;
1138
        }
1139
    }
1140
 
1141
  if (!known_alignment_for_access_p (dr))
1142
    {
1143
      tree type = (TREE_TYPE (DR_REF (dr)));
1144
      tree ba = DR_BASE_OBJECT (dr);
1145
      bool is_packed = false;
1146
 
1147
      if (ba)
1148
        is_packed = contains_packed_reference (ba);
1149
 
1150
      if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1151
        is_packed = true;
1152
 
1153
      if (vect_print_dump_info (REPORT_DETAILS))
1154
        fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1155
      if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1156
        return true;
1157
      else
1158
        return false;
1159
    }
1160
 
1161
  return true;
1162
}
1163
 
1164
 
1165
/* Calculate the cost of the memory access represented by DR.  */
1166
 
1167
static void
1168
vect_get_data_access_cost (struct data_reference *dr,
1169
                           unsigned int *inside_cost,
1170
                           unsigned int *outside_cost)
1171
{
1172
  gimple stmt = DR_STMT (dr);
1173
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174
  int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1175
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1176
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1177
  int ncopies = vf / nunits;
1178
  bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1179
 
1180
  if (!supportable_dr_alignment)
1181
    *inside_cost = VECT_MAX_COST;
1182
  else
1183
    {
1184
      if (DR_IS_READ (dr))
1185
        vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1186
      else
1187
        vect_get_store_cost (dr, ncopies, inside_cost);
1188
    }
1189
 
1190
  if (vect_print_dump_info (REPORT_COST))
1191
    fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1192
             "outside_cost = %d.", *inside_cost, *outside_cost);
1193
}
1194
 
1195
 
1196
static hashval_t
1197
vect_peeling_hash (const void *elem)
1198
{
1199
  const struct _vect_peel_info *peel_info;
1200
 
1201
  peel_info = (const struct _vect_peel_info *) elem;
1202
  return (hashval_t) peel_info->npeel;
1203
}
1204
 
1205
 
1206
static int
1207
vect_peeling_hash_eq (const void *elem1, const void *elem2)
1208
{
1209
  const struct _vect_peel_info *a, *b;
1210
 
1211
  a = (const struct _vect_peel_info *) elem1;
1212
  b = (const struct _vect_peel_info *) elem2;
1213
  return (a->npeel == b->npeel);
1214
}
1215
 
1216
 
1217
/* Insert DR into peeling hash table with NPEEL as key.  */
1218
 
1219
static void
1220
vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1221
                          int npeel)
1222
{
1223
  struct _vect_peel_info elem, *slot;
1224
  void **new_slot;
1225
  bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1226
 
1227
  elem.npeel = npeel;
1228
  slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1229
                                     &elem);
1230
  if (slot)
1231
    slot->count++;
1232
  else
1233
    {
1234
      slot = XNEW (struct _vect_peel_info);
1235
      slot->npeel = npeel;
1236
      slot->dr = dr;
1237
      slot->count = 1;
1238
      new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1239
                                 INSERT);
1240
      *new_slot = slot;
1241
    }
1242
 
1243
  if (!supportable_dr_alignment && !flag_vect_cost_model)
1244
    slot->count += VECT_MAX_COST;
1245
}
1246
 
1247
 
1248
/* Traverse peeling hash table to find peeling option that aligns maximum
1249
   number of data accesses.  */
1250
 
1251
static int
1252
vect_peeling_hash_get_most_frequent (void **slot, void *data)
1253
{
1254
  vect_peel_info elem = (vect_peel_info) *slot;
1255
  vect_peel_extended_info max = (vect_peel_extended_info) data;
1256
 
1257
  if (elem->count > max->peel_info.count
1258
      || (elem->count == max->peel_info.count
1259
          && max->peel_info.npeel > elem->npeel))
1260
    {
1261
      max->peel_info.npeel = elem->npeel;
1262
      max->peel_info.count = elem->count;
1263
      max->peel_info.dr = elem->dr;
1264
    }
1265
 
1266
  return 1;
1267
}
1268
 
1269
 
1270
/* Traverse peeling hash table and calculate cost for each peeling option.
1271
   Find the one with the lowest cost.  */
1272
 
1273
static int
1274
vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1275
{
1276
  vect_peel_info elem = (vect_peel_info) *slot;
1277
  vect_peel_extended_info min = (vect_peel_extended_info) data;
1278
  int save_misalignment, dummy;
1279
  unsigned int inside_cost = 0, outside_cost = 0, i;
1280
  gimple stmt = DR_STMT (elem->dr);
1281
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1282
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1283
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1284
  struct data_reference *dr;
1285
 
1286
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1287
    {
1288
      stmt = DR_STMT (dr);
1289
      stmt_info = vinfo_for_stmt (stmt);
1290
      /* For interleaving, only the alignment of the first access
1291
         matters.  */
1292
      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1293
          && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1294
        continue;
1295
 
1296
      save_misalignment = DR_MISALIGNMENT (dr);
1297
      vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1298
      vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1299
      SET_DR_MISALIGNMENT (dr, save_misalignment);
1300
    }
1301
 
1302
  outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1303
                         vect_get_single_scalar_iteraion_cost (loop_vinfo));
1304
 
1305
  if (inside_cost < min->inside_cost
1306
      || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1307
    {
1308
      min->inside_cost = inside_cost;
1309
      min->outside_cost = outside_cost;
1310
      min->peel_info.dr = elem->dr;
1311
      min->peel_info.npeel = elem->npeel;
1312
    }
1313
 
1314
  return 1;
1315
}
1316
 
1317
 
1318
/* Choose best peeling option by traversing peeling hash table and either
1319
   choosing an option with the lowest cost (if cost model is enabled) or the
1320
   option that aligns as many accesses as possible.  */
1321
 
1322
static struct data_reference *
1323
vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1324
                                       unsigned int *npeel)
1325
{
1326
   struct _vect_peel_extended_info res;
1327
 
1328
   res.peel_info.dr = NULL;
1329
 
1330
   if (flag_vect_cost_model)
1331
     {
1332
       res.inside_cost = INT_MAX;
1333
       res.outside_cost = INT_MAX;
1334
       htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1335
                      vect_peeling_hash_get_lowest_cost, &res);
1336
     }
1337
   else
1338
     {
1339
       res.peel_info.count = 0;
1340
       htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1341
                      vect_peeling_hash_get_most_frequent, &res);
1342
     }
1343
 
1344
   *npeel = res.peel_info.npeel;
1345
   return res.peel_info.dr;
1346
}
1347
 
1348
 
1349
/* Function vect_enhance_data_refs_alignment
1350
 
1351
   This pass will use loop versioning and loop peeling in order to enhance
1352
   the alignment of data references in the loop.
1353
 
1354
   FOR NOW: we assume that whatever versioning/peeling takes place, only the
1355
   original loop is to be vectorized.  Any other loops that are created by
1356
   the transformations performed in this pass - are not supposed to be
1357
   vectorized.  This restriction will be relaxed.
1358
 
1359
   This pass will require a cost model to guide it whether to apply peeling
1360
   or versioning or a combination of the two.  For example, the scheme that
1361
   intel uses when given a loop with several memory accesses, is as follows:
1362
   choose one memory access ('p') which alignment you want to force by doing
1363
   peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
1364
   other accesses are not necessarily aligned, or (2) use loop versioning to
1365
   generate one loop in which all accesses are aligned, and another loop in
1366
   which only 'p' is necessarily aligned.
1367
 
1368
   ("Automatic Intra-Register Vectorization for the Intel Architecture",
1369
   Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1370
   Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1371
 
1372
   Devising a cost model is the most critical aspect of this work.  It will
1373
   guide us on which access to peel for, whether to use loop versioning, how
1374
   many versions to create, etc.  The cost model will probably consist of
1375
   generic considerations as well as target specific considerations (on
1376
   powerpc for example, misaligned stores are more painful than misaligned
1377
   loads).
1378
 
1379
   Here are the general steps involved in alignment enhancements:
1380
 
1381
     -- original loop, before alignment analysis:
1382
        for (i=0; i<N; i++){
1383
          x = q[i];                     # DR_MISALIGNMENT(q) = unknown
1384
          p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1385
        }
1386
 
1387
     -- After vect_compute_data_refs_alignment:
1388
        for (i=0; i<N; i++){
1389
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1390
          p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1391
        }
1392
 
1393
     -- Possibility 1: we do loop versioning:
1394
     if (p is aligned) {
1395
        for (i=0; i<N; i++){    # loop 1A
1396
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1397
          p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1398
        }
1399
     }
1400
     else {
1401
        for (i=0; i<N; i++){    # loop 1B
1402
          x = q[i];                     # DR_MISALIGNMENT(q) = 3
1403
          p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1404
        }
1405
     }
1406
 
1407
     -- Possibility 2: we do loop peeling:
1408
     for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1409
        x = q[i];
1410
        p[i] = y;
1411
     }
1412
     for (i = 3; i < N; i++){   # loop 2A
1413
        x = q[i];                       # DR_MISALIGNMENT(q) = 0
1414
        p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
1415
     }
1416
 
1417
     -- Possibility 3: combination of loop peeling and versioning:
1418
     for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1419
        x = q[i];
1420
        p[i] = y;
1421
     }
1422
     if (p is aligned) {
1423
        for (i = 3; i<N; i++){  # loop 3A
1424
          x = q[i];                     # DR_MISALIGNMENT(q) = 0
1425
          p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1426
        }
1427
     }
1428
     else {
1429
        for (i = 3; i<N; i++){  # loop 3B
1430
          x = q[i];                     # DR_MISALIGNMENT(q) = 0
1431
          p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1432
        }
1433
     }
1434
 
1435
     These loops are later passed to loop_transform to be vectorized.  The
1436
     vectorizer will use the alignment information to guide the transformation
1437
     (whether to generate regular loads/stores, or with special handling for
1438
     misalignment).  */
1439
 
1440
bool
1441
vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1442
{
1443
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1444
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1445
  enum dr_alignment_support supportable_dr_alignment;
1446
  struct data_reference *dr0 = NULL, *first_store = NULL;
1447
  struct data_reference *dr;
1448
  unsigned int i, j;
1449
  bool do_peeling = false;
1450
  bool do_versioning = false;
1451
  bool stat;
1452
  gimple stmt;
1453
  stmt_vec_info stmt_info;
1454
  int vect_versioning_for_alias_required;
1455
  unsigned int npeel = 0;
1456
  bool all_misalignments_unknown = true;
1457
  unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1458
  unsigned possible_npeel_number = 1;
1459
  tree vectype;
1460
  unsigned int nelements, mis, same_align_drs_max = 0;
1461
 
1462
  if (vect_print_dump_info (REPORT_DETAILS))
1463
    fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1464
 
1465
  /* While cost model enhancements are expected in the future, the high level
1466
     view of the code at this time is as follows:
1467
 
1468
     A) If there is a misaligned access then see if peeling to align
1469
        this access can make all data references satisfy
1470
        vect_supportable_dr_alignment.  If so, update data structures
1471
        as needed and return true.
1472
 
1473
     B) If peeling wasn't possible and there is a data reference with an
1474
        unknown misalignment that does not satisfy vect_supportable_dr_alignment
1475
        then see if loop versioning checks can be used to make all data
1476
        references satisfy vect_supportable_dr_alignment.  If so, update
1477
        data structures as needed and return true.
1478
 
1479
     C) If neither peeling nor versioning were successful then return false if
1480
        any data reference does not satisfy vect_supportable_dr_alignment.
1481
 
1482
     D) Return true (all data references satisfy vect_supportable_dr_alignment).
1483
 
1484
     Note, Possibility 3 above (which is peeling and versioning together) is not
1485
     being done at this time.  */
1486
 
1487
  /* (1) Peeling to force alignment.  */
1488
 
1489
  /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1490
     Considerations:
1491
     + How many accesses will become aligned due to the peeling
1492
     - How many accesses will become unaligned due to the peeling,
1493
       and the cost of misaligned accesses.
1494
     - The cost of peeling (the extra runtime checks, the increase
1495
       in code size).  */
1496
 
1497
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1498
    {
1499
      stmt = DR_STMT (dr);
1500
      stmt_info = vinfo_for_stmt (stmt);
1501
 
1502
      if (!STMT_VINFO_RELEVANT (stmt_info))
1503
        continue;
1504
 
1505
      /* For interleaving, only the alignment of the first access
1506
         matters.  */
1507
      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1508
          && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1509
        continue;
1510
 
1511
      /* For invariant accesses there is nothing to enhance.  */
1512
      if (integer_zerop (DR_STEP (dr)))
1513
        continue;
1514
 
1515
      supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1516
      do_peeling = vector_alignment_reachable_p (dr);
1517
      if (do_peeling)
1518
        {
1519
          if (known_alignment_for_access_p (dr))
1520
            {
1521
              unsigned int npeel_tmp;
1522
              bool negative = tree_int_cst_compare (DR_STEP (dr),
1523
                                                    size_zero_node) < 0;
1524
 
1525
              /* Save info about DR in the hash table.  */
1526
              if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1527
                LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1528
                           htab_create (1, vect_peeling_hash,
1529
                                        vect_peeling_hash_eq, free);
1530
 
1531
              vectype = STMT_VINFO_VECTYPE (stmt_info);
1532
              nelements = TYPE_VECTOR_SUBPARTS (vectype);
1533
              mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1534
                                                TREE_TYPE (DR_REF (dr))));
1535
              npeel_tmp = (negative
1536
                           ? (mis - nelements) : (nelements - mis))
1537
                  & (nelements - 1);
1538
 
1539
              /* For multiple types, it is possible that the bigger type access
1540
                 will have more than one peeling option.  E.g., a loop with two
1541
                 types: one of size (vector size / 4), and the other one of
1542
                 size (vector size / 8).  Vectorization factor will 8.  If both
1543
                 access are misaligned by 3, the first one needs one scalar
1544
                 iteration to be aligned, and the second one needs 5.  But the
1545
                 the first one will be aligned also by peeling 5 scalar
1546
                 iterations, and in that case both accesses will be aligned.
1547
                 Hence, except for the immediate peeling amount, we also want
1548
                 to try to add full vector size, while we don't exceed
1549
                 vectorization factor.
1550
                 We do this automtically for cost model, since we calculate cost
1551
                 for every peeling option.  */
1552
              if (!flag_vect_cost_model)
1553
                possible_npeel_number = vf /nelements;
1554
 
1555
              /* Handle the aligned case. We may decide to align some other
1556
                 access, making DR unaligned.  */
1557
              if (DR_MISALIGNMENT (dr) == 0)
1558
                {
1559
                  npeel_tmp = 0;
1560
                  if (!flag_vect_cost_model)
1561
                    possible_npeel_number++;
1562
                }
1563
 
1564
              for (j = 0; j < possible_npeel_number; j++)
1565
                {
1566
                  gcc_assert (npeel_tmp <= vf);
1567
                  vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1568
                  npeel_tmp += nelements;
1569
                }
1570
 
1571
              all_misalignments_unknown = false;
1572
              /* Data-ref that was chosen for the case that all the
1573
                 misalignments are unknown is not relevant anymore, since we
1574
                 have a data-ref with known alignment.  */
1575
              dr0 = NULL;
1576
            }
1577
          else
1578
            {
1579
              /* If we don't know all the misalignment values, we prefer
1580
                 peeling for data-ref that has maximum number of data-refs
1581
                 with the same alignment, unless the target prefers to align
1582
                 stores over load.  */
1583
              if (all_misalignments_unknown)
1584
                {
1585
                  if (same_align_drs_max  < VEC_length (dr_p,
1586
                                       STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1587
                      || !dr0)
1588
                    {
1589
                      same_align_drs_max = VEC_length (dr_p,
1590
                                       STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1591
                      dr0 = dr;
1592
                    }
1593
 
1594
                  if (!first_store && DR_IS_WRITE (dr))
1595
                    first_store = dr;
1596
                }
1597
 
1598
              /* If there are both known and unknown misaligned accesses in the
1599
                 loop, we choose peeling amount according to the known
1600
                 accesses.  */
1601
 
1602
 
1603
              if (!supportable_dr_alignment)
1604
                {
1605
                  dr0 = dr;
1606
                  if (!first_store && DR_IS_WRITE (dr))
1607
                    first_store = dr;
1608
                }
1609
            }
1610
        }
1611
      else
1612
        {
1613
          if (!aligned_access_p (dr))
1614
            {
1615
              if (vect_print_dump_info (REPORT_DETAILS))
1616
                fprintf (vect_dump, "vector alignment may not be reachable");
1617
 
1618
              break;
1619
            }
1620
        }
1621
    }
1622
 
1623
  vect_versioning_for_alias_required
1624
    = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1625
 
1626
  /* Temporarily, if versioning for alias is required, we disable peeling
1627
     until we support peeling and versioning.  Often peeling for alignment
1628
     will require peeling for loop-bound, which in turn requires that we
1629
     know how to adjust the loop ivs after the loop.  */
1630
  if (vect_versioning_for_alias_required
1631
      || !vect_can_advance_ivs_p (loop_vinfo)
1632
      || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1633
    do_peeling = false;
1634
 
1635
  if (do_peeling && all_misalignments_unknown
1636
      && vect_supportable_dr_alignment (dr0, false))
1637
    {
1638
 
1639
      /* Check if the target requires to prefer stores over loads, i.e., if
1640
         misaligned stores are more expensive than misaligned loads (taking
1641
         drs with same alignment into account).  */
1642
      if (first_store && DR_IS_READ (dr0))
1643
        {
1644
          unsigned int load_inside_cost = 0, load_outside_cost = 0;
1645
          unsigned int store_inside_cost = 0, store_outside_cost = 0;
1646
          unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1647
          unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1648
 
1649
          vect_get_data_access_cost (dr0, &load_inside_cost,
1650
                                     &load_outside_cost);
1651
          vect_get_data_access_cost (first_store, &store_inside_cost,
1652
                                     &store_outside_cost);
1653
 
1654
          /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1655
             aligning the load DR0).  */
1656
          load_inside_penalty = store_inside_cost;
1657
          load_outside_penalty = store_outside_cost;
1658
          for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1659
                                   (vinfo_for_stmt (DR_STMT (first_store))),
1660
                                   i, dr);
1661
               i++)
1662
            if (DR_IS_READ (dr))
1663
              {
1664
                load_inside_penalty += load_inside_cost;
1665
                load_outside_penalty += load_outside_cost;
1666
              }
1667
            else
1668
              {
1669
                load_inside_penalty += store_inside_cost;
1670
                load_outside_penalty += store_outside_cost;
1671
              }
1672
 
1673
          /* Calculate the penalty for leaving DR0 unaligned (by
1674
             aligning the FIRST_STORE).  */
1675
          store_inside_penalty = load_inside_cost;
1676
          store_outside_penalty = load_outside_cost;
1677
          for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1678
                                   (vinfo_for_stmt (DR_STMT (dr0))),
1679
                                   i, dr);
1680
               i++)
1681
            if (DR_IS_READ (dr))
1682
              {
1683
                store_inside_penalty += load_inside_cost;
1684
                store_outside_penalty += load_outside_cost;
1685
              }
1686
            else
1687
              {
1688
                store_inside_penalty += store_inside_cost;
1689
                store_outside_penalty += store_outside_cost;
1690
              }
1691
 
1692
          if (load_inside_penalty > store_inside_penalty
1693
              || (load_inside_penalty == store_inside_penalty
1694
                  && load_outside_penalty > store_outside_penalty))
1695
            dr0 = first_store;
1696
        }
1697
 
1698
      /* In case there are only loads with different unknown misalignments, use
1699
         peeling only if it may help to align other accesses in the loop.  */
1700
      if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1701
                                            (vinfo_for_stmt (DR_STMT (dr0))))
1702
          && vect_supportable_dr_alignment (dr0, false)
1703
              != dr_unaligned_supported)
1704
        do_peeling = false;
1705
    }
1706
 
1707
  if (do_peeling && !dr0)
1708
    {
1709
      /* Peeling is possible, but there is no data access that is not supported
1710
         unless aligned. So we try to choose the best possible peeling.  */
1711
 
1712
      /* We should get here only if there are drs with known misalignment.  */
1713
      gcc_assert (!all_misalignments_unknown);
1714
 
1715
      /* Choose the best peeling from the hash table.  */
1716
      dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1717
      if (!dr0 || !npeel)
1718
        do_peeling = false;
1719
    }
1720
 
1721
  if (do_peeling)
1722
    {
1723
      stmt = DR_STMT (dr0);
1724
      stmt_info = vinfo_for_stmt (stmt);
1725
      vectype = STMT_VINFO_VECTYPE (stmt_info);
1726
      nelements = TYPE_VECTOR_SUBPARTS (vectype);
1727
 
1728
      if (known_alignment_for_access_p (dr0))
1729
        {
1730
          bool negative = tree_int_cst_compare (DR_STEP (dr0),
1731
                                                size_zero_node) < 0;
1732
          if (!npeel)
1733
            {
1734
              /* Since it's known at compile time, compute the number of
1735
                 iterations in the peeled loop (the peeling factor) for use in
1736
                 updating DR_MISALIGNMENT values.  The peeling factor is the
1737
                 vectorization factor minus the misalignment as an element
1738
                 count.  */
1739
              mis = DR_MISALIGNMENT (dr0);
1740
              mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1741
              npeel = ((negative ? mis - nelements : nelements - mis)
1742
                       & (nelements - 1));
1743
            }
1744
 
1745
          /* For interleaved data access every iteration accesses all the
1746
             members of the group, therefore we divide the number of iterations
1747
             by the group size.  */
1748
          stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1749
          if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1750
            npeel /= GROUP_SIZE (stmt_info);
1751
 
1752
          if (vect_print_dump_info (REPORT_DETAILS))
1753
            fprintf (vect_dump, "Try peeling by %d", npeel);
1754
        }
1755
 
1756
      /* Ensure that all data refs can be vectorized after the peel.  */
1757
      FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1758
        {
1759
          int save_misalignment;
1760
 
1761
          if (dr == dr0)
1762
            continue;
1763
 
1764
          stmt = DR_STMT (dr);
1765
          stmt_info = vinfo_for_stmt (stmt);
1766
          /* For interleaving, only the alignment of the first access
1767
            matters.  */
1768
          if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1769
              && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1770
            continue;
1771
 
1772
          save_misalignment = DR_MISALIGNMENT (dr);
1773
          vect_update_misalignment_for_peel (dr, dr0, npeel);
1774
          supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1775
          SET_DR_MISALIGNMENT (dr, save_misalignment);
1776
 
1777
          if (!supportable_dr_alignment)
1778
            {
1779
              do_peeling = false;
1780
              break;
1781
            }
1782
        }
1783
 
1784
      if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1785
        {
1786
          stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1787
          if (!stat)
1788
            do_peeling = false;
1789
          else
1790
            return stat;
1791
        }
1792
 
1793
      if (do_peeling)
1794
        {
1795
          /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1796
             If the misalignment of DR_i is identical to that of dr0 then set
1797
             DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1798
             dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1799
             by the peeling factor times the element size of DR_i (MOD the
1800
             vectorization factor times the size).  Otherwise, the
1801
             misalignment of DR_i must be set to unknown.  */
1802
          FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1803
            if (dr != dr0)
1804
              vect_update_misalignment_for_peel (dr, dr0, npeel);
1805
 
1806
          LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1807
          if (npeel)
1808
            LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1809
          else
1810
            LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1811
          SET_DR_MISALIGNMENT (dr0, 0);
1812
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1813
            fprintf (vect_dump, "Alignment of access forced using peeling.");
1814
 
1815
          if (vect_print_dump_info (REPORT_DETAILS))
1816
            fprintf (vect_dump, "Peeling for alignment will be applied.");
1817
 
1818
          stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1819
          gcc_assert (stat);
1820
          return stat;
1821
        }
1822
    }
1823
 
1824
 
1825
  /* (2) Versioning to force alignment.  */
1826
 
1827
  /* Try versioning if:
1828
     1) flag_tree_vect_loop_version is TRUE
1829
     2) optimize loop for speed
1830
     3) there is at least one unsupported misaligned data ref with an unknown
1831
        misalignment, and
1832
     4) all misaligned data refs with a known misalignment are supported, and
1833
     5) the number of runtime alignment checks is within reason.  */
1834
 
1835
  do_versioning =
1836
        flag_tree_vect_loop_version
1837
        && optimize_loop_nest_for_speed_p (loop)
1838
        && (!loop->inner); /* FORNOW */
1839
 
1840
  if (do_versioning)
1841
    {
1842
      FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1843
        {
1844
          stmt = DR_STMT (dr);
1845
          stmt_info = vinfo_for_stmt (stmt);
1846
 
1847
          /* For interleaving, only the alignment of the first access
1848
             matters.  */
1849
          if (aligned_access_p (dr)
1850
              || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1851
                  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1852
            continue;
1853
 
1854
          supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1855
 
1856
          if (!supportable_dr_alignment)
1857
            {
1858
              gimple stmt;
1859
              int mask;
1860
              tree vectype;
1861
 
1862
              if (known_alignment_for_access_p (dr)
1863
                  || VEC_length (gimple,
1864
                                 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1865
                     >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1866
                {
1867
                  do_versioning = false;
1868
                  break;
1869
                }
1870
 
1871
              stmt = DR_STMT (dr);
1872
              vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1873
              gcc_assert (vectype);
1874
 
1875
              /* The rightmost bits of an aligned address must be zeros.
1876
                 Construct the mask needed for this test.  For example,
1877
                 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1878
                 mask must be 15 = 0xf. */
1879
              mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1880
 
1881
              /* FORNOW: use the same mask to test all potentially unaligned
1882
                 references in the loop.  The vectorizer currently supports
1883
                 a single vector size, see the reference to
1884
                 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1885
                 vectorization factor is computed.  */
1886
              gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1887
                          || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1888
              LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1889
              VEC_safe_push (gimple, heap,
1890
                             LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1891
                             DR_STMT (dr));
1892
            }
1893
        }
1894
 
1895
      /* Versioning requires at least one misaligned data reference.  */
1896
      if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1897
        do_versioning = false;
1898
      else if (!do_versioning)
1899
        VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1900
    }
1901
 
1902
  if (do_versioning)
1903
    {
1904
      VEC(gimple,heap) *may_misalign_stmts
1905
        = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1906
      gimple stmt;
1907
 
1908
      /* It can now be assumed that the data references in the statements
1909
         in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1910
         of the loop being vectorized.  */
1911
      FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1912
        {
1913
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1914
          dr = STMT_VINFO_DATA_REF (stmt_info);
1915
          SET_DR_MISALIGNMENT (dr, 0);
1916
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1917
            fprintf (vect_dump, "Alignment of access forced using versioning.");
1918
        }
1919
 
1920
      if (vect_print_dump_info (REPORT_DETAILS))
1921
        fprintf (vect_dump, "Versioning for alignment will be applied.");
1922
 
1923
      /* Peeling and versioning can't be done together at this time.  */
1924
      gcc_assert (! (do_peeling && do_versioning));
1925
 
1926
      stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1927
      gcc_assert (stat);
1928
      return stat;
1929
    }
1930
 
1931
  /* This point is reached if neither peeling nor versioning is being done.  */
1932
  gcc_assert (! (do_peeling || do_versioning));
1933
 
1934
  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1935
  return stat;
1936
}
1937
 
1938
 
1939
/* Function vect_find_same_alignment_drs.
1940
 
1941
   Update group and alignment relations according to the chosen
1942
   vectorization factor.  */
1943
 
1944
static void
1945
vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1946
                              loop_vec_info loop_vinfo)
1947
{
1948
  unsigned int i;
1949
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1950
  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1951
  struct data_reference *dra = DDR_A (ddr);
1952
  struct data_reference *drb = DDR_B (ddr);
1953
  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1954
  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1955
  int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1956
  int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1957
  lambda_vector dist_v;
1958
  unsigned int loop_depth;
1959
 
1960
  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1961
    return;
1962
 
1963
  if (dra == drb)
1964
    return;
1965
 
1966
  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1967
    return;
1968
 
1969
  /* Loop-based vectorization and known data dependence.  */
1970
  if (DDR_NUM_DIST_VECTS (ddr) == 0)
1971
    return;
1972
 
1973
  /* Data-dependence analysis reports a distance vector of zero
1974
     for data-references that overlap only in the first iteration
1975
     but have different sign step (see PR45764).
1976
     So as a sanity check require equal DR_STEP.  */
1977
  if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1978
    return;
1979
 
1980
  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1981
  FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1982
    {
1983
      int dist = dist_v[loop_depth];
1984
 
1985
      if (vect_print_dump_info (REPORT_DR_DETAILS))
1986
        fprintf (vect_dump, "dependence distance  = %d.", dist);
1987
 
1988
      /* Same loop iteration.  */
1989
      if (dist == 0
1990
          || (dist % vectorization_factor == 0 && dra_size == drb_size))
1991
        {
1992
          /* Two references with distance zero have the same alignment.  */
1993
          VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1994
          VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1995
          if (vect_print_dump_info (REPORT_ALIGNMENT))
1996
            fprintf (vect_dump, "accesses have the same alignment.");
1997
          if (vect_print_dump_info (REPORT_DR_DETAILS))
1998
            {
1999
              fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
2000
              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
2001
              fprintf (vect_dump, " and ");
2002
              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2003
            }
2004
        }
2005
    }
2006
}
2007
 
2008
 
2009
/* Function vect_analyze_data_refs_alignment
2010
 
2011
   Analyze the alignment of the data-references in the loop.
2012
   Return FALSE if a data reference is found that cannot be vectorized.  */
2013
 
2014
bool
2015
vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2016
                                  bb_vec_info bb_vinfo)
2017
{
2018
  if (vect_print_dump_info (REPORT_DETAILS))
2019
    fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2020
 
2021
  /* Mark groups of data references with same alignment using
2022
     data dependence information.  */
2023
  if (loop_vinfo)
2024
    {
2025
      VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2026
      struct data_dependence_relation *ddr;
2027
      unsigned int i;
2028
 
2029
      FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2030
        vect_find_same_alignment_drs (ddr, loop_vinfo);
2031
    }
2032
 
2033
  if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2034
    {
2035
      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2036
        fprintf (vect_dump,
2037
                 "not vectorized: can't calculate alignment for data ref.");
2038
      return false;
2039
    }
2040
 
2041
  return true;
2042
}
2043
 
2044
 
2045
/* Analyze groups of strided accesses: check that DR belongs to a group of
2046
   strided accesses of legal size, step, etc.  Detect gaps, single element
2047
   interleaving, and other special cases. Set strided access info.
2048
   Collect groups of strided stores for further use in SLP analysis.  */
2049
 
2050
static bool
2051
vect_analyze_group_access (struct data_reference *dr)
2052
{
2053
  tree step = DR_STEP (dr);
2054
  tree scalar_type = TREE_TYPE (DR_REF (dr));
2055
  HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2056
  gimple stmt = DR_STMT (dr);
2057
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2058
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2059
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2060
  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2061
  HOST_WIDE_INT stride, last_accessed_element = 1;
2062
  bool slp_impossible = false;
2063
  struct loop *loop = NULL;
2064
 
2065
  if (loop_vinfo)
2066
    loop = LOOP_VINFO_LOOP (loop_vinfo);
2067
 
2068
  /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2069
     interleaving group (including gaps).  */
2070
  stride = dr_step / type_size;
2071
 
2072
  /* Not consecutive access is possible only if it is a part of interleaving.  */
2073
  if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2074
    {
2075
      /* Check if it this DR is a part of interleaving, and is a single
2076
         element of the group that is accessed in the loop.  */
2077
 
2078
      /* Gaps are supported only for loads. STEP must be a multiple of the type
2079
         size.  The size of the group must be a power of 2.  */
2080
      if (DR_IS_READ (dr)
2081
          && (dr_step % type_size) == 0
2082
          && stride > 0
2083
          && exact_log2 (stride) != -1)
2084
        {
2085
          GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2086
          GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2087
          if (vect_print_dump_info (REPORT_DR_DETAILS))
2088
            {
2089
              fprintf (vect_dump, "Detected single element interleaving ");
2090
              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2091
              fprintf (vect_dump, " step ");
2092
              print_generic_expr (vect_dump, step, TDF_SLIM);
2093
            }
2094
 
2095
          if (loop_vinfo)
2096
            {
2097
              if (vect_print_dump_info (REPORT_DETAILS))
2098
                fprintf (vect_dump, "Data access with gaps requires scalar "
2099
                                    "epilogue loop");
2100
              if (loop->inner)
2101
                {
2102
                  if (vect_print_dump_info (REPORT_DETAILS))
2103
                    fprintf (vect_dump, "Peeling for outer loop is not"
2104
                                        " supported");
2105
                  return false;
2106
                }
2107
 
2108
              LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2109
            }
2110
 
2111
          return true;
2112
        }
2113
 
2114
      if (vect_print_dump_info (REPORT_DETAILS))
2115
        {
2116
          fprintf (vect_dump, "not consecutive access ");
2117
          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2118
        }
2119
 
2120
      if (bb_vinfo)
2121
        {
2122
          /* Mark the statement as unvectorizable.  */
2123
          STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2124
          return true;
2125
        }
2126
 
2127
      return false;
2128
    }
2129
 
2130
  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2131
    {
2132
      /* First stmt in the interleaving chain. Check the chain.  */
2133
      gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2134
      struct data_reference *data_ref = dr;
2135
      unsigned int count = 1;
2136
      tree next_step;
2137
      tree prev_init = DR_INIT (data_ref);
2138
      gimple prev = stmt;
2139
      HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2140
 
2141
      while (next)
2142
        {
2143
          /* Skip same data-refs.  In case that two or more stmts share
2144
             data-ref (supported only for loads), we vectorize only the first
2145
             stmt, and the rest get their vectorized loads from the first
2146
             one.  */
2147
          if (!tree_int_cst_compare (DR_INIT (data_ref),
2148
                                     DR_INIT (STMT_VINFO_DATA_REF (
2149
                                                   vinfo_for_stmt (next)))))
2150
            {
2151
              if (DR_IS_WRITE (data_ref))
2152
                {
2153
                  if (vect_print_dump_info (REPORT_DETAILS))
2154
                    fprintf (vect_dump, "Two store stmts share the same dr.");
2155
                  return false;
2156
                }
2157
 
2158
              /* Check that there is no load-store dependencies for this loads
2159
                 to prevent a case of load-store-load to the same location.  */
2160
              if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2161
                  || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2162
                {
2163
                  if (vect_print_dump_info (REPORT_DETAILS))
2164
                    fprintf (vect_dump,
2165
                             "READ_WRITE dependence in interleaving.");
2166
                  return false;
2167
                }
2168
 
2169
              /* For load use the same data-ref load.  */
2170
              GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2171
 
2172
              prev = next;
2173
              next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2174
              continue;
2175
            }
2176
 
2177
          prev = next;
2178
 
2179
          /* Check that all the accesses have the same STEP.  */
2180
          next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2181
          if (tree_int_cst_compare (step, next_step))
2182
            {
2183
              if (vect_print_dump_info (REPORT_DETAILS))
2184
                fprintf (vect_dump, "not consecutive access in interleaving");
2185
              return false;
2186
            }
2187
 
2188
          data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2189
          /* Check that the distance between two accesses is equal to the type
2190
             size. Otherwise, we have gaps.  */
2191
          diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2192
                  - TREE_INT_CST_LOW (prev_init)) / type_size;
2193
          if (diff != 1)
2194
            {
2195
              /* FORNOW: SLP of accesses with gaps is not supported.  */
2196
              slp_impossible = true;
2197
              if (DR_IS_WRITE (data_ref))
2198
                {
2199
                  if (vect_print_dump_info (REPORT_DETAILS))
2200
                    fprintf (vect_dump, "interleaved store with gaps");
2201
                  return false;
2202
                }
2203
 
2204
              gaps += diff - 1;
2205
            }
2206
 
2207
          last_accessed_element += diff;
2208
 
2209
          /* Store the gap from the previous member of the group. If there is no
2210
             gap in the access, GROUP_GAP is always 1.  */
2211
          GROUP_GAP (vinfo_for_stmt (next)) = diff;
2212
 
2213
          prev_init = DR_INIT (data_ref);
2214
          next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2215
          /* Count the number of data-refs in the chain.  */
2216
          count++;
2217
        }
2218
 
2219
      /* COUNT is the number of accesses found, we multiply it by the size of
2220
         the type to get COUNT_IN_BYTES.  */
2221
      count_in_bytes = type_size * count;
2222
 
2223
      /* Check that the size of the interleaving (including gaps) is not
2224
         greater than STEP.  */
2225
      if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2226
        {
2227
          if (vect_print_dump_info (REPORT_DETAILS))
2228
            {
2229
              fprintf (vect_dump, "interleaving size is greater than step for ");
2230
              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2231
            }
2232
          return false;
2233
        }
2234
 
2235
      /* Check that the size of the interleaving is equal to STEP for stores,
2236
         i.e., that there are no gaps.  */
2237
      if (dr_step && dr_step != count_in_bytes)
2238
        {
2239
          if (DR_IS_READ (dr))
2240
            {
2241
              slp_impossible = true;
2242
              /* There is a gap after the last load in the group. This gap is a
2243
                 difference between the stride and the number of elements. When
2244
                 there is no gap, this difference should be 0.  */
2245
              GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2246
            }
2247
          else
2248
            {
2249
              if (vect_print_dump_info (REPORT_DETAILS))
2250
                fprintf (vect_dump, "interleaved store with gaps");
2251
              return false;
2252
            }
2253
        }
2254
 
2255
      /* Check that STEP is a multiple of type size.  */
2256
      if (dr_step && (dr_step % type_size) != 0)
2257
        {
2258
          if (vect_print_dump_info (REPORT_DETAILS))
2259
            {
2260
              fprintf (vect_dump, "step is not a multiple of type size: step ");
2261
              print_generic_expr (vect_dump, step, TDF_SLIM);
2262
              fprintf (vect_dump, " size ");
2263
              print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2264
                                  TDF_SLIM);
2265
            }
2266
          return false;
2267
        }
2268
 
2269
      if (stride == 0)
2270
        stride = count;
2271
 
2272
      GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2273
      if (vect_print_dump_info (REPORT_DETAILS))
2274
        fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2275
 
2276
      /* SLP: create an SLP data structure for every interleaving group of
2277
         stores for further analysis in vect_analyse_slp.  */
2278
      if (DR_IS_WRITE (dr) && !slp_impossible)
2279
        {
2280
          if (loop_vinfo)
2281
            VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2282
                           stmt);
2283
          if (bb_vinfo)
2284
            VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2285
                           stmt);
2286
        }
2287
 
2288
      /* There is a gap in the end of the group.  */
2289
      if (stride - last_accessed_element > 0 && loop_vinfo)
2290
        {
2291
          if (vect_print_dump_info (REPORT_DETAILS))
2292
            fprintf (vect_dump, "Data access with gaps requires scalar "
2293
                                "epilogue loop");
2294
          if (loop->inner)
2295
            {
2296
              if (vect_print_dump_info (REPORT_DETAILS))
2297
                fprintf (vect_dump, "Peeling for outer loop is not supported");
2298
              return false;
2299
            }
2300
 
2301
          LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2302
        }
2303
    }
2304
 
2305
  return true;
2306
}
2307
 
2308
 
2309
/* Analyze the access pattern of the data-reference DR.
2310
   In case of non-consecutive accesses call vect_analyze_group_access() to
2311
   analyze groups of strided accesses.  */
2312
 
2313
static bool
2314
vect_analyze_data_ref_access (struct data_reference *dr)
2315
{
2316
  tree step = DR_STEP (dr);
2317
  tree scalar_type = TREE_TYPE (DR_REF (dr));
2318
  gimple stmt = DR_STMT (dr);
2319
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2320
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2321
  struct loop *loop = NULL;
2322
  HOST_WIDE_INT dr_step;
2323
 
2324
  if (loop_vinfo)
2325
    loop = LOOP_VINFO_LOOP (loop_vinfo);
2326
 
2327
  if (loop_vinfo && !step)
2328
    {
2329
      if (vect_print_dump_info (REPORT_DETAILS))
2330
        fprintf (vect_dump, "bad data-ref access in loop");
2331
      return false;
2332
    }
2333
 
2334
  /* Allow invariant loads in loops.  */
2335
  dr_step = TREE_INT_CST_LOW (step);
2336
  if (loop_vinfo && dr_step == 0)
2337
    {
2338
      GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2339
      return DR_IS_READ (dr);
2340
    }
2341
 
2342
  if (loop && nested_in_vect_loop_p (loop, stmt))
2343
    {
2344
      /* Interleaved accesses are not yet supported within outer-loop
2345
        vectorization for references in the inner-loop.  */
2346
      GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2347
 
2348
      /* For the rest of the analysis we use the outer-loop step.  */
2349
      step = STMT_VINFO_DR_STEP (stmt_info);
2350
      dr_step = TREE_INT_CST_LOW (step);
2351
 
2352
      if (dr_step == 0)
2353
        {
2354
          if (vect_print_dump_info (REPORT_ALIGNMENT))
2355
            fprintf (vect_dump, "zero step in outer loop.");
2356
          if (DR_IS_READ (dr))
2357
            return true;
2358
          else
2359
            return false;
2360
        }
2361
    }
2362
 
2363
  /* Consecutive?  */
2364
  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2365
      || (dr_step < 0
2366
          && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2367
    {
2368
      /* Mark that it is not interleaving.  */
2369
      GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2370
      return true;
2371
    }
2372
 
2373
  if (loop && nested_in_vect_loop_p (loop, stmt))
2374
    {
2375
      if (vect_print_dump_info (REPORT_ALIGNMENT))
2376
        fprintf (vect_dump, "strided access in outer loop.");
2377
      return false;
2378
    }
2379
 
2380
  /* Not consecutive access - check if it's a part of interleaving group.  */
2381
  return vect_analyze_group_access (dr);
2382
}
2383
 
2384
 
2385
/* Function vect_analyze_data_ref_accesses.
2386
 
2387
   Analyze the access pattern of all the data references in the loop.
2388
 
2389
   FORNOW: the only access pattern that is considered vectorizable is a
2390
           simple step 1 (consecutive) access.
2391
 
2392
   FORNOW: handle only arrays and pointer accesses.  */
2393
 
2394
bool
2395
vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2396
{
2397
  unsigned int i;
2398
  VEC (data_reference_p, heap) *datarefs;
2399
  struct data_reference *dr;
2400
 
2401
  if (vect_print_dump_info (REPORT_DETAILS))
2402
    fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2403
 
2404
  if (loop_vinfo)
2405
    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2406
  else
2407
    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2408
 
2409
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2410
    if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2411
        && !vect_analyze_data_ref_access (dr))
2412
      {
2413
        if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2414
          fprintf (vect_dump, "not vectorized: complicated access pattern.");
2415
 
2416
        if (bb_vinfo)
2417
          {
2418
            /* Mark the statement as not vectorizable.  */
2419
            STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2420
            continue;
2421
          }
2422
        else
2423
          return false;
2424
      }
2425
 
2426
  return true;
2427
}
2428
 
2429
/* Function vect_prune_runtime_alias_test_list.
2430
 
2431
   Prune a list of ddrs to be tested at run-time by versioning for alias.
2432
   Return FALSE if resulting list of ddrs is longer then allowed by
2433
   PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
2434
 
2435
bool
2436
vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2437
{
2438
  VEC (ddr_p, heap) * ddrs =
2439
    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2440
  unsigned i, j;
2441
 
2442
  if (vect_print_dump_info (REPORT_DETAILS))
2443
    fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2444
 
2445
  for (i = 0; i < VEC_length (ddr_p, ddrs); )
2446
    {
2447
      bool found;
2448
      ddr_p ddr_i;
2449
 
2450
      ddr_i = VEC_index (ddr_p, ddrs, i);
2451
      found = false;
2452
 
2453
      for (j = 0; j < i; j++)
2454
        {
2455
          ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2456
 
2457
          if (vect_vfa_range_equal (ddr_i, ddr_j))
2458
            {
2459
              if (vect_print_dump_info (REPORT_DR_DETAILS))
2460
                {
2461
                  fprintf (vect_dump, "found equal ranges ");
2462
                  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2463
                  fprintf (vect_dump, ", ");
2464
                  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2465
                  fprintf (vect_dump, " and ");
2466
                  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2467
                  fprintf (vect_dump, ", ");
2468
                  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2469
                }
2470
              found = true;
2471
              break;
2472
            }
2473
        }
2474
 
2475
      if (found)
2476
      {
2477
        VEC_ordered_remove (ddr_p, ddrs, i);
2478
        continue;
2479
      }
2480
      i++;
2481
    }
2482
 
2483
  if (VEC_length (ddr_p, ddrs) >
2484
       (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2485
    {
2486
      if (vect_print_dump_info (REPORT_DR_DETAILS))
2487
        {
2488
          fprintf (vect_dump,
2489
                   "disable versioning for alias - max number of generated "
2490
                   "checks exceeded.");
2491
        }
2492
 
2493
      VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2494
 
2495
      return false;
2496
    }
2497
 
2498
  return true;
2499
}
2500
 
2501
/* Check whether a non-affine read in stmt is suitable for gather load
2502
   and if so, return a builtin decl for that operation.  */
2503
 
2504
tree
2505
vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2506
                   tree *offp, int *scalep)
2507
{
2508
  HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2509
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2510
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2511
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2512
  tree offtype = NULL_TREE;
2513
  tree decl, base, off;
2514
  enum machine_mode pmode;
2515
  int punsignedp, pvolatilep;
2516
 
2517
  /* The gather builtins need address of the form
2518
     loop_invariant + vector * {1, 2, 4, 8}
2519
     or
2520
     loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2521
     Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2522
     of loop invariants/SSA_NAMEs defined in the loop, with casts,
2523
     multiplications and additions in it.  To get a vector, we need
2524
     a single SSA_NAME that will be defined in the loop and will
2525
     contain everything that is not loop invariant and that can be
2526
     vectorized.  The following code attempts to find such a preexistng
2527
     SSA_NAME OFF and put the loop invariants into a tree BASE
2528
     that can be gimplified before the loop.  */
2529
  base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2530
                              &pmode, &punsignedp, &pvolatilep, false);
2531
  gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2532
 
2533
  if (TREE_CODE (base) == MEM_REF)
2534
    {
2535
      if (!integer_zerop (TREE_OPERAND (base, 1)))
2536
        {
2537
          if (off == NULL_TREE)
2538
            {
2539
              double_int moff = mem_ref_offset (base);
2540
              off = double_int_to_tree (sizetype, moff);
2541
            }
2542
          else
2543
            off = size_binop (PLUS_EXPR, off,
2544
                              fold_convert (sizetype, TREE_OPERAND (base, 1)));
2545
        }
2546
      base = TREE_OPERAND (base, 0);
2547
    }
2548
  else
2549
    base = build_fold_addr_expr (base);
2550
 
2551
  if (off == NULL_TREE)
2552
    off = size_zero_node;
2553
 
2554
  /* If base is not loop invariant, either off is 0, then we start with just
2555
     the constant offset in the loop invariant BASE and continue with base
2556
     as OFF, otherwise give up.
2557
     We could handle that case by gimplifying the addition of base + off
2558
     into some SSA_NAME and use that as off, but for now punt.  */
2559
  if (!expr_invariant_in_loop_p (loop, base))
2560
    {
2561
      if (!integer_zerop (off))
2562
        return NULL_TREE;
2563
      off = base;
2564
      base = size_int (pbitpos / BITS_PER_UNIT);
2565
    }
2566
  /* Otherwise put base + constant offset into the loop invariant BASE
2567
     and continue with OFF.  */
2568
  else
2569
    {
2570
      base = fold_convert (sizetype, base);
2571
      base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2572
    }
2573
 
2574
  /* OFF at this point may be either a SSA_NAME or some tree expression
2575
     from get_inner_reference.  Try to peel off loop invariants from it
2576
     into BASE as long as possible.  */
2577
  STRIP_NOPS (off);
2578
  while (offtype == NULL_TREE)
2579
    {
2580
      enum tree_code code;
2581
      tree op0, op1, add = NULL_TREE;
2582
 
2583
      if (TREE_CODE (off) == SSA_NAME)
2584
        {
2585
          gimple def_stmt = SSA_NAME_DEF_STMT (off);
2586
 
2587
          if (expr_invariant_in_loop_p (loop, off))
2588
            return NULL_TREE;
2589
 
2590
          if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2591
            break;
2592
 
2593
          op0 = gimple_assign_rhs1 (def_stmt);
2594
          code = gimple_assign_rhs_code (def_stmt);
2595
          op1 = gimple_assign_rhs2 (def_stmt);
2596
        }
2597
      else
2598
        {
2599
          if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2600
            return NULL_TREE;
2601
          code = TREE_CODE (off);
2602
          extract_ops_from_tree (off, &code, &op0, &op1);
2603
        }
2604
      switch (code)
2605
        {
2606
        case POINTER_PLUS_EXPR:
2607
        case PLUS_EXPR:
2608
          if (expr_invariant_in_loop_p (loop, op0))
2609
            {
2610
              add = op0;
2611
              off = op1;
2612
            do_add:
2613
              add = fold_convert (sizetype, add);
2614
              if (scale != 1)
2615
                add = size_binop (MULT_EXPR, add, size_int (scale));
2616
              base = size_binop (PLUS_EXPR, base, add);
2617
              continue;
2618
            }
2619
          if (expr_invariant_in_loop_p (loop, op1))
2620
            {
2621
              add = op1;
2622
              off = op0;
2623
              goto do_add;
2624
            }
2625
          break;
2626
        case MINUS_EXPR:
2627
          if (expr_invariant_in_loop_p (loop, op1))
2628
            {
2629
              add = fold_convert (sizetype, op1);
2630
              add = size_binop (MINUS_EXPR, size_zero_node, add);
2631
              off = op0;
2632
              goto do_add;
2633
            }
2634
          break;
2635
        case MULT_EXPR:
2636
          if (scale == 1 && host_integerp (op1, 0))
2637
            {
2638
              scale = tree_low_cst (op1, 0);
2639
              off = op0;
2640
              continue;
2641
            }
2642
          break;
2643
        case SSA_NAME:
2644
          off = op0;
2645
          continue;
2646
        CASE_CONVERT:
2647
          if (!POINTER_TYPE_P (TREE_TYPE (op0))
2648
              && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2649
            break;
2650
          if (TYPE_PRECISION (TREE_TYPE (op0))
2651
              == TYPE_PRECISION (TREE_TYPE (off)))
2652
            {
2653
              off = op0;
2654
              continue;
2655
            }
2656
          if (TYPE_PRECISION (TREE_TYPE (op0))
2657
              < TYPE_PRECISION (TREE_TYPE (off)))
2658
            {
2659
              off = op0;
2660
              offtype = TREE_TYPE (off);
2661
              STRIP_NOPS (off);
2662
              continue;
2663
            }
2664
          break;
2665
        default:
2666
          break;
2667
        }
2668
      break;
2669
    }
2670
 
2671
  /* If at the end OFF still isn't a SSA_NAME or isn't
2672
     defined in the loop, punt.  */
2673
  if (TREE_CODE (off) != SSA_NAME
2674
      || expr_invariant_in_loop_p (loop, off))
2675
    return NULL_TREE;
2676
 
2677
  if (offtype == NULL_TREE)
2678
    offtype = TREE_TYPE (off);
2679
 
2680
  decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2681
                                           offtype, scale);
2682
  if (decl == NULL_TREE)
2683
    return NULL_TREE;
2684
 
2685
  if (basep)
2686
    *basep = base;
2687
  if (offp)
2688
    *offp = off;
2689
  if (scalep)
2690
    *scalep = scale;
2691
  return decl;
2692
}
2693
 
2694
 
2695
/* Function vect_analyze_data_refs.
2696
 
2697
  Find all the data references in the loop or basic block.
2698
 
2699
   The general structure of the analysis of data refs in the vectorizer is as
2700
   follows:
2701
   1- vect_analyze_data_refs(loop/bb): call
2702
      compute_data_dependences_for_loop/bb to find and analyze all data-refs
2703
      in the loop/bb and their dependences.
2704
   2- vect_analyze_dependences(): apply dependence testing using ddrs.
2705
   3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2706
   4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2707
 
2708
*/
2709
 
2710
bool
2711
vect_analyze_data_refs (loop_vec_info loop_vinfo,
2712
                        bb_vec_info bb_vinfo,
2713
                        int *min_vf)
2714
{
2715
  struct loop *loop = NULL;
2716
  basic_block bb = NULL;
2717
  unsigned int i;
2718
  VEC (data_reference_p, heap) *datarefs;
2719
  struct data_reference *dr;
2720
  tree scalar_type;
2721
  bool res, stop_bb_analysis = false;
2722
 
2723
  if (vect_print_dump_info (REPORT_DETAILS))
2724
    fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2725
 
2726
  if (loop_vinfo)
2727
    {
2728
      loop = LOOP_VINFO_LOOP (loop_vinfo);
2729
      res = compute_data_dependences_for_loop
2730
        (loop, true,
2731
         &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2732
         &LOOP_VINFO_DATAREFS (loop_vinfo),
2733
         &LOOP_VINFO_DDRS (loop_vinfo));
2734
 
2735
      if (!res)
2736
        {
2737
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2738
            fprintf (vect_dump, "not vectorized: loop contains function calls"
2739
                     " or data references that cannot be analyzed");
2740
          return false;
2741
        }
2742
 
2743
      datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2744
    }
2745
  else
2746
    {
2747
      bb = BB_VINFO_BB (bb_vinfo);
2748
      res = compute_data_dependences_for_bb (bb, true,
2749
                                             &BB_VINFO_DATAREFS (bb_vinfo),
2750
                                             &BB_VINFO_DDRS (bb_vinfo));
2751
      if (!res)
2752
        {
2753
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2754
            fprintf (vect_dump, "not vectorized: basic block contains function"
2755
                     " calls or data references that cannot be analyzed");
2756
          return false;
2757
        }
2758
 
2759
      datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2760
    }
2761
 
2762
  /* Go through the data-refs, check that the analysis succeeded.  Update
2763
     pointer from stmt_vec_info struct to DR and vectype.  */
2764
 
2765
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2766
    {
2767
      gimple stmt;
2768
      stmt_vec_info stmt_info;
2769
      tree base, offset, init;
2770
      bool gather = false;
2771
      int vf;
2772
 
2773
      if (!dr || !DR_REF (dr))
2774
        {
2775
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2776
            fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2777
 
2778
          return false;
2779
        }
2780
 
2781
      stmt = DR_STMT (dr);
2782
      stmt_info = vinfo_for_stmt (stmt);
2783
 
2784
      if (stop_bb_analysis)
2785
        {
2786
          STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2787
          continue;
2788
        }
2789
 
2790
      /* Check that analysis of the data-ref succeeded.  */
2791
      if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2792
          || !DR_STEP (dr))
2793
        {
2794
          /* If target supports vector gather loads, see if they can't
2795
             be used.  */
2796
          if (loop_vinfo
2797
              && DR_IS_READ (dr)
2798
              && !TREE_THIS_VOLATILE (DR_REF (dr))
2799
              && targetm.vectorize.builtin_gather != NULL
2800
              && !nested_in_vect_loop_p (loop, stmt))
2801
            {
2802
              struct data_reference *newdr
2803
                = create_data_ref (NULL, loop_containing_stmt (stmt),
2804
                                   DR_REF (dr), stmt, true);
2805
              gcc_assert (newdr != NULL && DR_REF (newdr));
2806
              if (DR_BASE_ADDRESS (newdr)
2807
                  && DR_OFFSET (newdr)
2808
                  && DR_INIT (newdr)
2809
                  && DR_STEP (newdr)
2810
                  && integer_zerop (DR_STEP (newdr)))
2811
                {
2812
                  dr = newdr;
2813
                  gather = true;
2814
                }
2815
              else
2816
                free_data_ref (newdr);
2817
            }
2818
 
2819
          if (!gather)
2820
            {
2821
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2822
                {
2823
                  fprintf (vect_dump, "not vectorized: data ref analysis "
2824
                                      "failed ");
2825
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2826
                }
2827
 
2828
              if (bb_vinfo)
2829
                {
2830
                  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2831
                  stop_bb_analysis = true;
2832
                  continue;
2833
                }
2834
 
2835
              return false;
2836
            }
2837
        }
2838
 
2839
      if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2840
        {
2841
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2842
            fprintf (vect_dump, "not vectorized: base addr of dr is a "
2843
                     "constant");
2844
 
2845
          if (bb_vinfo)
2846
            {
2847
              STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2848
              stop_bb_analysis = true;
2849
              continue;
2850
            }
2851
 
2852
          if (gather)
2853
            free_data_ref (dr);
2854
          return false;
2855
        }
2856
 
2857
      if (TREE_THIS_VOLATILE (DR_REF (dr)))
2858
        {
2859
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2860
            {
2861
              fprintf (vect_dump, "not vectorized: volatile type ");
2862
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2863
            }
2864
 
2865
          if (bb_vinfo)
2866
            {
2867
              STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2868
              stop_bb_analysis = true;
2869
              continue;
2870
            }
2871
 
2872
          return false;
2873
        }
2874
 
2875
      base = unshare_expr (DR_BASE_ADDRESS (dr));
2876
      offset = unshare_expr (DR_OFFSET (dr));
2877
      init = unshare_expr (DR_INIT (dr));
2878
 
2879
      if (stmt_can_throw_internal (stmt))
2880
        {
2881
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2882
            {
2883
              fprintf (vect_dump, "not vectorized: statement can throw an "
2884
                       "exception ");
2885
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2886
            }
2887
 
2888
          if (bb_vinfo)
2889
            {
2890
              STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2891
              stop_bb_analysis = true;
2892
              continue;
2893
            }
2894
 
2895
          if (gather)
2896
            free_data_ref (dr);
2897
          return false;
2898
        }
2899
 
2900
      if (is_gimple_call (stmt))
2901
        {
2902
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2903
            {
2904
              fprintf (vect_dump, "not vectorized: dr in a call ");
2905
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2906
            }
2907
 
2908
          if (bb_vinfo)
2909
            {
2910
              STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2911
              stop_bb_analysis = true;
2912
              continue;
2913
            }
2914
 
2915
          if (gather)
2916
            free_data_ref (dr);
2917
          return false;
2918
        }
2919
 
2920
      /* Update DR field in stmt_vec_info struct.  */
2921
 
2922
      /* If the dataref is in an inner-loop of the loop that is considered for
2923
         for vectorization, we also want to analyze the access relative to
2924
         the outer-loop (DR contains information only relative to the
2925
         inner-most enclosing loop).  We do that by building a reference to the
2926
         first location accessed by the inner-loop, and analyze it relative to
2927
         the outer-loop.  */
2928
      if (loop && nested_in_vect_loop_p (loop, stmt))
2929
        {
2930
          tree outer_step, outer_base, outer_init;
2931
          HOST_WIDE_INT pbitsize, pbitpos;
2932
          tree poffset;
2933
          enum machine_mode pmode;
2934
          int punsignedp, pvolatilep;
2935
          affine_iv base_iv, offset_iv;
2936
          tree dinit;
2937
 
2938
          /* Build a reference to the first location accessed by the
2939
             inner-loop: *(BASE+INIT).  (The first location is actually
2940
             BASE+INIT+OFFSET, but we add OFFSET separately later).  */
2941
          tree inner_base = build_fold_indirect_ref
2942
                                (fold_build_pointer_plus (base, init));
2943
 
2944
          if (vect_print_dump_info (REPORT_DETAILS))
2945
            {
2946
              fprintf (vect_dump, "analyze in outer-loop: ");
2947
              print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2948
            }
2949
 
2950
          outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2951
                          &poffset, &pmode, &punsignedp, &pvolatilep, false);
2952
          gcc_assert (outer_base != NULL_TREE);
2953
 
2954
          if (pbitpos % BITS_PER_UNIT != 0)
2955
            {
2956
              if (vect_print_dump_info (REPORT_DETAILS))
2957
                fprintf (vect_dump, "failed: bit offset alignment.\n");
2958
              return false;
2959
            }
2960
 
2961
          outer_base = build_fold_addr_expr (outer_base);
2962
          if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2963
                          &base_iv, false))
2964
            {
2965
              if (vect_print_dump_info (REPORT_DETAILS))
2966
                fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2967
              return false;
2968
            }
2969
 
2970
          if (offset)
2971
            {
2972
              if (poffset)
2973
                poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2974
                                       poffset);
2975
              else
2976
                poffset = offset;
2977
            }
2978
 
2979
          if (!poffset)
2980
            {
2981
              offset_iv.base = ssize_int (0);
2982
              offset_iv.step = ssize_int (0);
2983
            }
2984
          else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2985
                               &offset_iv, false))
2986
            {
2987
              if (vect_print_dump_info (REPORT_DETAILS))
2988
                fprintf (vect_dump, "evolution of offset is not affine.\n");
2989
              return false;
2990
            }
2991
 
2992
          outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2993
          split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2994
          outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
2995
          split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2996
          outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
2997
 
2998
          outer_step = size_binop (PLUS_EXPR,
2999
                                fold_convert (ssizetype, base_iv.step),
3000
                                fold_convert (ssizetype, offset_iv.step));
3001
 
3002
          STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3003
          /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3004
          STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3005
          STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3006
          STMT_VINFO_DR_OFFSET (stmt_info) =
3007
                                fold_convert (ssizetype, offset_iv.base);
3008
          STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3009
                                size_int (highest_pow2_factor (offset_iv.base));
3010
 
3011
          if (vect_print_dump_info (REPORT_DETAILS))
3012
            {
3013
              fprintf (vect_dump, "\touter base_address: ");
3014
              print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3015
              fprintf (vect_dump, "\n\touter offset from base address: ");
3016
              print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3017
              fprintf (vect_dump, "\n\touter constant offset from base address: ");
3018
              print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3019
              fprintf (vect_dump, "\n\touter step: ");
3020
              print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3021
              fprintf (vect_dump, "\n\touter aligned to: ");
3022
              print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3023
            }
3024
        }
3025
 
3026
      if (STMT_VINFO_DATA_REF (stmt_info))
3027
        {
3028
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3029
            {
3030
              fprintf (vect_dump,
3031
                       "not vectorized: more than one data ref in stmt: ");
3032
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3033
            }
3034
 
3035
          if (bb_vinfo)
3036
            {
3037
              STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3038
              stop_bb_analysis = true;
3039
              continue;
3040
            }
3041
 
3042
          if (gather)
3043
            free_data_ref (dr);
3044
          return false;
3045
        }
3046
 
3047
      STMT_VINFO_DATA_REF (stmt_info) = dr;
3048
 
3049
      /* Set vectype for STMT.  */
3050
      scalar_type = TREE_TYPE (DR_REF (dr));
3051
      STMT_VINFO_VECTYPE (stmt_info) =
3052
                get_vectype_for_scalar_type (scalar_type);
3053
      if (!STMT_VINFO_VECTYPE (stmt_info))
3054
        {
3055
          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3056
            {
3057
              fprintf (vect_dump,
3058
                       "not vectorized: no vectype for stmt: ");
3059
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3060
              fprintf (vect_dump, " scalar_type: ");
3061
              print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3062
            }
3063
 
3064
          if (bb_vinfo)
3065
            {
3066
              /* Mark the statement as not vectorizable.  */
3067
              STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3068
              stop_bb_analysis = true;
3069
              continue;
3070
            }
3071
 
3072
          if (gather)
3073
            {
3074
              STMT_VINFO_DATA_REF (stmt_info) = NULL;
3075
              free_data_ref (dr);
3076
            }
3077
          return false;
3078
        }
3079
 
3080
      /* Adjust the minimal vectorization factor according to the
3081
         vector type.  */
3082
      vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3083
      if (vf > *min_vf)
3084
        *min_vf = vf;
3085
 
3086
      if (gather)
3087
        {
3088
          unsigned int j, k, n;
3089
          struct data_reference *olddr
3090
            = VEC_index (data_reference_p, datarefs, i);
3091
          VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3092
          struct data_dependence_relation *ddr, *newddr;
3093
          bool bad = false;
3094
          tree off;
3095
          VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3096
 
3097
          if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL)
3098
              || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3099
            {
3100
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3101
                {
3102
                  fprintf (vect_dump,
3103
                           "not vectorized: not suitable for gather ");
3104
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3105
                }
3106
              return false;
3107
            }
3108
 
3109
          n = VEC_length (data_reference_p, datarefs) - 1;
3110
          for (j = 0, k = i - 1; j < i; j++)
3111
            {
3112
              ddr = VEC_index (ddr_p, ddrs, k);
3113
              gcc_assert (DDR_B (ddr) == olddr);
3114
              newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3115
                                                            nest);
3116
              VEC_replace (ddr_p, ddrs, k, newddr);
3117
              free_dependence_relation (ddr);
3118
              if (!bad
3119
                  && DR_IS_WRITE (DDR_A (newddr))
3120
                  && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3121
                bad = true;
3122
              k += --n;
3123
            }
3124
 
3125
          k++;
3126
          n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3127
          for (; k < n; k++)
3128
            {
3129
              ddr = VEC_index (ddr_p, ddrs, k);
3130
              gcc_assert (DDR_A (ddr) == olddr);
3131
              newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3132
                                                            nest);
3133
              VEC_replace (ddr_p, ddrs, k, newddr);
3134
              free_dependence_relation (ddr);
3135
              if (!bad
3136
                  && DR_IS_WRITE (DDR_B (newddr))
3137
                  && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3138
                bad = true;
3139
            }
3140
 
3141
          k = VEC_length (ddr_p, ddrs)
3142
              - VEC_length (data_reference_p, datarefs) + i;
3143
          ddr = VEC_index (ddr_p, ddrs, k);
3144
          gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3145
          newddr = initialize_data_dependence_relation (dr, dr, nest);
3146
          VEC_replace (ddr_p, ddrs, k, newddr);
3147
          free_dependence_relation (ddr);
3148
          VEC_replace (data_reference_p, datarefs, i, dr);
3149
 
3150
          if (bad)
3151
            {
3152
              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3153
                {
3154
                  fprintf (vect_dump,
3155
                           "not vectorized: data dependence conflict"
3156
                           " prevents gather");
3157
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3158
                }
3159
              return false;
3160
            }
3161
 
3162
          STMT_VINFO_GATHER_P (stmt_info) = true;
3163
        }
3164
    }
3165
 
3166
  return true;
3167
}
3168
 
3169
 
3170
/* Function vect_get_new_vect_var.
3171
 
3172
   Returns a name for a new variable.  The current naming scheme appends the
3173
   prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3174
   the name of vectorizer generated variables, and appends that to NAME if
3175
   provided.  */
3176
 
3177
tree
3178
vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3179
{
3180
  const char *prefix;
3181
  tree new_vect_var;
3182
 
3183
  switch (var_kind)
3184
  {
3185
  case vect_simple_var:
3186
    prefix = "vect_";
3187
    break;
3188
  case vect_scalar_var:
3189
    prefix = "stmp_";
3190
    break;
3191
  case vect_pointer_var:
3192
    prefix = "vect_p";
3193
    break;
3194
  default:
3195
    gcc_unreachable ();
3196
  }
3197
 
3198
  if (name)
3199
    {
3200
      char* tmp = concat (prefix, name, NULL);
3201
      new_vect_var = create_tmp_var (type, tmp);
3202
      free (tmp);
3203
    }
3204
  else
3205
    new_vect_var = create_tmp_var (type, prefix);
3206
 
3207
  /* Mark vector typed variable as a gimple register variable.  */
3208
  if (TREE_CODE (type) == VECTOR_TYPE)
3209
    DECL_GIMPLE_REG_P (new_vect_var) = true;
3210
 
3211
  return new_vect_var;
3212
}
3213
 
3214
 
3215
/* Function vect_create_addr_base_for_vector_ref.
3216
 
3217
   Create an expression that computes the address of the first memory location
3218
   that will be accessed for a data reference.
3219
 
3220
   Input:
3221
   STMT: The statement containing the data reference.
3222
   NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3223
   OFFSET: Optional. If supplied, it is be added to the initial address.
3224
   LOOP:    Specify relative to which loop-nest should the address be computed.
3225
            For example, when the dataref is in an inner-loop nested in an
3226
            outer-loop that is now being vectorized, LOOP can be either the
3227
            outer-loop, or the inner-loop.  The first memory location accessed
3228
            by the following dataref ('in' points to short):
3229
 
3230
                for (i=0; i<N; i++)
3231
                   for (j=0; j<M; j++)
3232
                     s += in[i+j]
3233
 
3234
            is as follows:
3235
            if LOOP=i_loop:     &in             (relative to i_loop)
3236
            if LOOP=j_loop:     &in+i*2B        (relative to j_loop)
3237
 
3238
   Output:
3239
   1. Return an SSA_NAME whose value is the address of the memory location of
3240
      the first vector of the data reference.
3241
   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3242
      these statement(s) which define the returned SSA_NAME.
3243
 
3244
   FORNOW: We are only handling array accesses with step 1.  */
3245
 
3246
tree
3247
vect_create_addr_base_for_vector_ref (gimple stmt,
3248
                                      gimple_seq *new_stmt_list,
3249
                                      tree offset,
3250
                                      struct loop *loop)
3251
{
3252
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3253
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3254
  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3255
  tree base_name;
3256
  tree data_ref_base_var;
3257
  tree vec_stmt;
3258
  tree addr_base, addr_expr;
3259
  tree dest;
3260
  gimple_seq seq = NULL;
3261
  tree base_offset = unshare_expr (DR_OFFSET (dr));
3262
  tree init = unshare_expr (DR_INIT (dr));
3263
  tree vect_ptr_type;
3264
  tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3265
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3266
  tree base;
3267
 
3268
  if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3269
    {
3270
      struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3271
 
3272
      gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3273
 
3274
      data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3275
      base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3276
      init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3277
    }
3278
 
3279
  if (loop_vinfo)
3280
    base_name = build_fold_indirect_ref (data_ref_base);
3281
  else
3282
    {
3283
      base_offset = ssize_int (0);
3284
      init = ssize_int (0);
3285
      base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3286
    }
3287
 
3288
  data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3289
  add_referenced_var (data_ref_base_var);
3290
  data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3291
                                        data_ref_base_var);
3292
  gimple_seq_add_seq (new_stmt_list, seq);
3293
 
3294
  /* Create base_offset */
3295
  base_offset = size_binop (PLUS_EXPR,
3296
                            fold_convert (sizetype, base_offset),
3297
                            fold_convert (sizetype, init));
3298
  dest = create_tmp_var (sizetype, "base_off");
3299
  add_referenced_var (dest);
3300
  base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3301
  gimple_seq_add_seq (new_stmt_list, seq);
3302
 
3303
  if (offset)
3304
    {
3305
      tree tmp = create_tmp_var (sizetype, "offset");
3306
 
3307
      add_referenced_var (tmp);
3308
      offset = fold_build2 (MULT_EXPR, sizetype,
3309
                            fold_convert (sizetype, offset), step);
3310
      base_offset = fold_build2 (PLUS_EXPR, sizetype,
3311
                                 base_offset, offset);
3312
      base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3313
      gimple_seq_add_seq (new_stmt_list, seq);
3314
    }
3315
 
3316
  /* base + base_offset */
3317
  if (loop_vinfo)
3318
    addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3319
  else
3320
    {
3321
      addr_base = build1 (ADDR_EXPR,
3322
                          build_pointer_type (TREE_TYPE (DR_REF (dr))),
3323
                          unshare_expr (DR_REF (dr)));
3324
    }
3325
 
3326
  vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3327
  base = get_base_address (DR_REF (dr));
3328
  if (base
3329
      && TREE_CODE (base) == MEM_REF)
3330
    vect_ptr_type
3331
      = build_qualified_type (vect_ptr_type,
3332
                              TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3333
 
3334
  vec_stmt = fold_convert (vect_ptr_type, addr_base);
3335
  addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3336
                                     get_name (base_name));
3337
  add_referenced_var (addr_expr);
3338
  vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3339
  gimple_seq_add_seq (new_stmt_list, seq);
3340
 
3341
  if (DR_PTR_INFO (dr)
3342
      && TREE_CODE (vec_stmt) == SSA_NAME)
3343
    {
3344
      duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3345
      if (offset)
3346
        {
3347
          SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
3348
          SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
3349
        }
3350
    }
3351
 
3352
  if (vect_print_dump_info (REPORT_DETAILS))
3353
    {
3354
      fprintf (vect_dump, "created ");
3355
      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3356
    }
3357
 
3358
  return vec_stmt;
3359
}
3360
 
3361
 
3362
/* Function vect_create_data_ref_ptr.
3363
 
3364
   Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3365
   location accessed in the loop by STMT, along with the def-use update
3366
   chain to appropriately advance the pointer through the loop iterations.
3367
   Also set aliasing information for the pointer.  This pointer is used by
3368
   the callers to this function to create a memory reference expression for
3369
   vector load/store access.
3370
 
3371
   Input:
3372
   1. STMT: a stmt that references memory. Expected to be of the form
3373
         GIMPLE_ASSIGN <name, data-ref> or
3374
         GIMPLE_ASSIGN <data-ref, name>.
3375
   2. AGGR_TYPE: the type of the reference, which should be either a vector
3376
        or an array.
3377
   3. AT_LOOP: the loop where the vector memref is to be created.
3378
   4. OFFSET (optional): an offset to be added to the initial address accessed
3379
        by the data-ref in STMT.
3380
   5. BSI: location where the new stmts are to be placed if there is no loop
3381
   6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3382
        pointing to the initial address.
3383
 
3384
   Output:
3385
   1. Declare a new ptr to vector_type, and have it point to the base of the
3386
      data reference (initial addressed accessed by the data reference).
3387
      For example, for vector of type V8HI, the following code is generated:
3388
 
3389
      v8hi *ap;
3390
      ap = (v8hi *)initial_address;
3391
 
3392
      if OFFSET is not supplied:
3393
         initial_address = &a[init];
3394
      if OFFSET is supplied:
3395
         initial_address = &a[init + OFFSET];
3396
 
3397
      Return the initial_address in INITIAL_ADDRESS.
3398
 
3399
   2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
3400
      update the pointer in each iteration of the loop.
3401
 
3402
      Return the increment stmt that updates the pointer in PTR_INCR.
3403
 
3404
   3. Set INV_P to true if the access pattern of the data reference in the
3405
      vectorized loop is invariant.  Set it to false otherwise.
3406
 
3407
   4. Return the pointer.  */
3408
 
3409
tree
3410
vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3411
                          tree offset, tree *initial_address,
3412
                          gimple_stmt_iterator *gsi, gimple *ptr_incr,
3413
                          bool only_init, bool *inv_p)
3414
{
3415
  tree base_name;
3416
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3417
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3418
  struct loop *loop = NULL;
3419
  bool nested_in_vect_loop = false;
3420
  struct loop *containing_loop = NULL;
3421
  tree aggr_ptr_type;
3422
  tree aggr_ptr;
3423
  tree new_temp;
3424
  gimple vec_stmt;
3425
  gimple_seq new_stmt_list = NULL;
3426
  edge pe = NULL;
3427
  basic_block new_bb;
3428
  tree aggr_ptr_init;
3429
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3430
  tree aptr;
3431
  gimple_stmt_iterator incr_gsi;
3432
  bool insert_after;
3433
  bool negative;
3434
  tree indx_before_incr, indx_after_incr;
3435
  gimple incr;
3436
  tree step;
3437
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3438
  tree base;
3439
 
3440
  gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3441
              || TREE_CODE (aggr_type) == VECTOR_TYPE);
3442
 
3443
  if (loop_vinfo)
3444
    {
3445
      loop = LOOP_VINFO_LOOP (loop_vinfo);
3446
      nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3447
      containing_loop = (gimple_bb (stmt))->loop_father;
3448
      pe = loop_preheader_edge (loop);
3449
    }
3450
  else
3451
    {
3452
      gcc_assert (bb_vinfo);
3453
      only_init = true;
3454
      *ptr_incr = NULL;
3455
    }
3456
 
3457
  /* Check the step (evolution) of the load in LOOP, and record
3458
     whether it's invariant.  */
3459
  if (nested_in_vect_loop)
3460
    step = STMT_VINFO_DR_STEP (stmt_info);
3461
  else
3462
    step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3463
 
3464
  if (tree_int_cst_compare (step, size_zero_node) == 0)
3465
    *inv_p = true;
3466
  else
3467
    *inv_p = false;
3468
  negative = tree_int_cst_compare (step, size_zero_node) < 0;
3469
 
3470
  /* Create an expression for the first address accessed by this load
3471
     in LOOP.  */
3472
  base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3473
 
3474
  if (vect_print_dump_info (REPORT_DETAILS))
3475
    {
3476
      tree data_ref_base = base_name;
3477
      fprintf (vect_dump, "create %s-pointer variable to type: ",
3478
               tree_code_name[(int) TREE_CODE (aggr_type)]);
3479
      print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3480
      if (TREE_CODE (data_ref_base) == VAR_DECL
3481
          || TREE_CODE (data_ref_base) == ARRAY_REF)
3482
        fprintf (vect_dump, "  vectorizing an array ref: ");
3483
      else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3484
        fprintf (vect_dump, "  vectorizing a record based array ref: ");
3485
      else if (TREE_CODE (data_ref_base) == SSA_NAME)
3486
        fprintf (vect_dump, "  vectorizing a pointer ref: ");
3487
      print_generic_expr (vect_dump, base_name, TDF_SLIM);
3488
    }
3489
 
3490
  /* (1) Create the new aggregate-pointer variable.  */
3491
  aggr_ptr_type = build_pointer_type (aggr_type);
3492
  base = get_base_address (DR_REF (dr));
3493
  if (base
3494
      && TREE_CODE (base) == MEM_REF)
3495
    aggr_ptr_type
3496
      = build_qualified_type (aggr_ptr_type,
3497
                              TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3498
  aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3499
                                    get_name (base_name));
3500
 
3501
  /* Vector and array types inherit the alias set of their component
3502
     type by default so we need to use a ref-all pointer if the data
3503
     reference does not conflict with the created aggregated data
3504
     reference because it is not addressable.  */
3505
  if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3506
                              get_alias_set (DR_REF (dr))))
3507
    {
3508
      aggr_ptr_type
3509
        = build_pointer_type_for_mode (aggr_type,
3510
                                       TYPE_MODE (aggr_ptr_type), true);
3511
      aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3512
                                        get_name (base_name));
3513
    }
3514
 
3515
  /* Likewise for any of the data references in the stmt group.  */
3516
  else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3517
    {
3518
      gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3519
      do
3520
        {
3521
          tree lhs = gimple_assign_lhs (orig_stmt);
3522
          if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3523
                                      get_alias_set (lhs)))
3524
            {
3525
              aggr_ptr_type
3526
                = build_pointer_type_for_mode (aggr_type,
3527
                                               TYPE_MODE (aggr_ptr_type), true);
3528
              aggr_ptr
3529
                = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3530
                                         get_name (base_name));
3531
              break;
3532
            }
3533
 
3534
          orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3535
        }
3536
      while (orig_stmt);
3537
    }
3538
 
3539
  add_referenced_var (aggr_ptr);
3540
 
3541
  /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3542
     vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3543
     def-use update cycles for the pointer: one relative to the outer-loop
3544
     (LOOP), which is what steps (3) and (4) below do.  The other is relative
3545
     to the inner-loop (which is the inner-most loop containing the dataref),
3546
     and this is done be step (5) below.
3547
 
3548
     When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3549
     inner-most loop, and so steps (3),(4) work the same, and step (5) is
3550
     redundant.  Steps (3),(4) create the following:
3551
 
3552
        vp0 = &base_addr;
3553
        LOOP:   vp1 = phi(vp0,vp2)
3554
                ...
3555
                ...
3556
                vp2 = vp1 + step
3557
                goto LOOP
3558
 
3559
     If there is an inner-loop nested in loop, then step (5) will also be
3560
     applied, and an additional update in the inner-loop will be created:
3561
 
3562
        vp0 = &base_addr;
3563
        LOOP:   vp1 = phi(vp0,vp2)
3564
                ...
3565
        inner:     vp3 = phi(vp1,vp4)
3566
                   vp4 = vp3 + inner_step
3567
                   if () goto inner
3568
                ...
3569
                vp2 = vp1 + step
3570
                if () goto LOOP   */
3571
 
3572
  /* (2) Calculate the initial address of the aggregate-pointer, and set
3573
     the aggregate-pointer to point to it before the loop.  */
3574
 
3575
  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
3576
 
3577
  new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3578
                                                   offset, loop);
3579
  if (new_stmt_list)
3580
    {
3581
      if (pe)
3582
        {
3583
          new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3584
          gcc_assert (!new_bb);
3585
        }
3586
      else
3587
        gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3588
    }
3589
 
3590
  *initial_address = new_temp;
3591
 
3592
  /* Create: p = (aggr_type *) initial_base  */
3593
  if (TREE_CODE (new_temp) != SSA_NAME
3594
      || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3595
    {
3596
      vec_stmt = gimple_build_assign (aggr_ptr,
3597
                                      fold_convert (aggr_ptr_type, new_temp));
3598
      aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3599
      /* Copy the points-to information if it exists. */
3600
      if (DR_PTR_INFO (dr))
3601
        duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3602
      gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3603
      if (pe)
3604
        {
3605
          new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3606
          gcc_assert (!new_bb);
3607
        }
3608
      else
3609
        gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3610
    }
3611
  else
3612
    aggr_ptr_init = new_temp;
3613
 
3614
  /* (3) Handle the updating of the aggregate-pointer inside the loop.
3615
     This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3616
     inner-loop nested in LOOP (during outer-loop vectorization).  */
3617
 
3618
  /* No update in loop is required.  */
3619
  if (only_init && (!loop_vinfo || at_loop == loop))
3620
    aptr = aggr_ptr_init;
3621
  else
3622
    {
3623
      /* The step of the aggregate pointer is the type size.  */
3624
      tree step = TYPE_SIZE_UNIT (aggr_type);
3625
      /* One exception to the above is when the scalar step of the load in
3626
         LOOP is zero. In this case the step here is also zero.  */
3627
      if (*inv_p)
3628
        step = size_zero_node;
3629
      else if (negative)
3630
        step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3631
 
3632
      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3633
 
3634
      create_iv (aggr_ptr_init,
3635
                 fold_convert (aggr_ptr_type, step),
3636
                 aggr_ptr, loop, &incr_gsi, insert_after,
3637
                 &indx_before_incr, &indx_after_incr);
3638
      incr = gsi_stmt (incr_gsi);
3639
      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3640
 
3641
      /* Copy the points-to information if it exists. */
3642
      if (DR_PTR_INFO (dr))
3643
        {
3644
          duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3645
          duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3646
        }
3647
      if (ptr_incr)
3648
        *ptr_incr = incr;
3649
 
3650
      aptr = indx_before_incr;
3651
    }
3652
 
3653
  if (!nested_in_vect_loop || only_init)
3654
    return aptr;
3655
 
3656
 
3657
  /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3658
     nested in LOOP, if exists.  */
3659
 
3660
  gcc_assert (nested_in_vect_loop);
3661
  if (!only_init)
3662
    {
3663
      standard_iv_increment_position (containing_loop, &incr_gsi,
3664
                                      &insert_after);
3665
      create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3666
                 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3667
                 &indx_after_incr);
3668
      incr = gsi_stmt (incr_gsi);
3669
      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3670
 
3671
      /* Copy the points-to information if it exists. */
3672
      if (DR_PTR_INFO (dr))
3673
        {
3674
          duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3675
          duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3676
        }
3677
      if (ptr_incr)
3678
        *ptr_incr = incr;
3679
 
3680
      return indx_before_incr;
3681
    }
3682
  else
3683
    gcc_unreachable ();
3684
}
3685
 
3686
 
3687
/* Function bump_vector_ptr
3688
 
3689
   Increment a pointer (to a vector type) by vector-size. If requested,
3690
   i.e. if PTR-INCR is given, then also connect the new increment stmt
3691
   to the existing def-use update-chain of the pointer, by modifying
3692
   the PTR_INCR as illustrated below:
3693
 
3694
   The pointer def-use update-chain before this function:
3695
                        DATAREF_PTR = phi (p_0, p_2)
3696
                        ....
3697
        PTR_INCR:       p_2 = DATAREF_PTR + step
3698
 
3699
   The pointer def-use update-chain after this function:
3700
                        DATAREF_PTR = phi (p_0, p_2)
3701
                        ....
3702
                        NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3703
                        ....
3704
        PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
3705
 
3706
   Input:
3707
   DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3708
                 in the loop.
3709
   PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3710
              the loop.  The increment amount across iterations is expected
3711
              to be vector_size.
3712
   BSI - location where the new update stmt is to be placed.
3713
   STMT - the original scalar memory-access stmt that is being vectorized.
3714
   BUMP - optional. The offset by which to bump the pointer. If not given,
3715
          the offset is assumed to be vector_size.
3716
 
3717
   Output: Return NEW_DATAREF_PTR as illustrated above.
3718
 
3719
*/
3720
 
3721
tree
3722
bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3723
                 gimple stmt, tree bump)
3724
{
3725
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3726
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3727
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3728
  tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3729
  tree update = TYPE_SIZE_UNIT (vectype);
3730
  gimple incr_stmt;
3731
  ssa_op_iter iter;
3732
  use_operand_p use_p;
3733
  tree new_dataref_ptr;
3734
 
3735
  if (bump)
3736
    update = bump;
3737
 
3738
  incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3739
                                            dataref_ptr, update);
3740
  new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3741
  gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3742
  vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3743
 
3744
  /* Copy the points-to information if it exists. */
3745
  if (DR_PTR_INFO (dr))
3746
    {
3747
      duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3748
      SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3749
      SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3750
    }
3751
 
3752
  if (!ptr_incr)
3753
    return new_dataref_ptr;
3754
 
3755
  /* Update the vector-pointer's cross-iteration increment.  */
3756
  FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3757
    {
3758
      tree use = USE_FROM_PTR (use_p);
3759
 
3760
      if (use == dataref_ptr)
3761
        SET_USE (use_p, new_dataref_ptr);
3762
      else
3763
        gcc_assert (tree_int_cst_compare (use, update) == 0);
3764
    }
3765
 
3766
  return new_dataref_ptr;
3767
}
3768
 
3769
 
3770
/* Function vect_create_destination_var.
3771
 
3772
   Create a new temporary of type VECTYPE.  */
3773
 
3774
tree
3775
vect_create_destination_var (tree scalar_dest, tree vectype)
3776
{
3777
  tree vec_dest;
3778
  const char *new_name;
3779
  tree type;
3780
  enum vect_var_kind kind;
3781
 
3782
  kind = vectype ? vect_simple_var : vect_scalar_var;
3783
  type = vectype ? vectype : TREE_TYPE (scalar_dest);
3784
 
3785
  gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3786
 
3787
  new_name = get_name (scalar_dest);
3788
  if (!new_name)
3789
    new_name = "var_";
3790
  vec_dest = vect_get_new_vect_var (type, kind, new_name);
3791
  add_referenced_var (vec_dest);
3792
 
3793
  return vec_dest;
3794
}
3795
 
3796
/* Function vect_strided_store_supported.
3797
 
3798
   Returns TRUE if interleave high and interleave low permutations
3799
   are supported, and FALSE otherwise.  */
3800
 
3801
bool
3802
vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3803
{
3804
  enum machine_mode mode = TYPE_MODE (vectype);
3805
 
3806
  /* vect_permute_store_chain requires the group size to be a power of two.  */
3807
  if (exact_log2 (count) == -1)
3808
    {
3809
      if (vect_print_dump_info (REPORT_DETAILS))
3810
        fprintf (vect_dump, "the size of the group of strided accesses"
3811
                 " is not a power of 2");
3812
      return false;
3813
    }
3814
 
3815
  /* Check that the permutation is supported.  */
3816
  if (VECTOR_MODE_P (mode))
3817
    {
3818
      unsigned int i, nelt = GET_MODE_NUNITS (mode);
3819
      unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3820
      for (i = 0; i < nelt / 2; i++)
3821
        {
3822
          sel[i * 2] = i;
3823
          sel[i * 2 + 1] = i + nelt;
3824
        }
3825
      if (can_vec_perm_p (mode, false, sel))
3826
        {
3827
          for (i = 0; i < nelt; i++)
3828
            sel[i] += nelt / 2;
3829
          if (can_vec_perm_p (mode, false, sel))
3830
            return true;
3831
        }
3832
    }
3833
 
3834
  if (vect_print_dump_info (REPORT_DETAILS))
3835
    fprintf (vect_dump, "interleave op not supported by target.");
3836
  return false;
3837
}
3838
 
3839
 
3840
/* Return TRUE if vec_store_lanes is available for COUNT vectors of
3841
   type VECTYPE.  */
3842
 
3843
bool
3844
vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3845
{
3846
  return vect_lanes_optab_supported_p ("vec_store_lanes",
3847
                                       vec_store_lanes_optab,
3848
                                       vectype, count);
3849
}
3850
 
3851
 
3852
/* Function vect_permute_store_chain.
3853
 
3854
   Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3855
   a power of 2, generate interleave_high/low stmts to reorder the data
3856
   correctly for the stores.  Return the final references for stores in
3857
   RESULT_CHAIN.
3858
 
3859
   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3860
   The input is 4 vectors each containing 8 elements.  We assign a number to
3861
   each element, the input sequence is:
3862
 
3863
   1st vec:   0  1  2  3  4  5  6  7
3864
   2nd vec:   8  9 10 11 12 13 14 15
3865
   3rd vec:  16 17 18 19 20 21 22 23
3866
   4th vec:  24 25 26 27 28 29 30 31
3867
 
3868
   The output sequence should be:
3869
 
3870
   1st vec:  0  8 16 24  1  9 17 25
3871
   2nd vec:  2 10 18 26  3 11 19 27
3872
   3rd vec:  4 12 20 28  5 13 21 30
3873
   4th vec:  6 14 22 30  7 15 23 31
3874
 
3875
   i.e., we interleave the contents of the four vectors in their order.
3876
 
3877
   We use interleave_high/low instructions to create such output.  The input of
3878
   each interleave_high/low operation is two vectors:
3879
   1st vec    2nd vec
3880
 
3881
   the even elements of the result vector are obtained left-to-right from the
3882
   high/low elements of the first vector.  The odd elements of the result are
3883
   obtained left-to-right from the high/low elements of the second vector.
3884
   The output of interleave_high will be:   0 4 1 5
3885
   and of interleave_low:                   2 6 3 7
3886
 
3887
 
3888
   The permutation is done in log LENGTH stages.  In each stage interleave_high
3889
   and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3890
   where the first argument is taken from the first half of DR_CHAIN and the
3891
   second argument from it's second half.
3892
   In our example,
3893
 
3894
   I1: interleave_high (1st vec, 3rd vec)
3895
   I2: interleave_low (1st vec, 3rd vec)
3896
   I3: interleave_high (2nd vec, 4th vec)
3897
   I4: interleave_low (2nd vec, 4th vec)
3898
 
3899
   The output for the first stage is:
3900
 
3901
   I1:  0 16  1 17  2 18  3 19
3902
   I2:  4 20  5 21  6 22  7 23
3903
   I3:  8 24  9 25 10 26 11 27
3904
   I4: 12 28 13 29 14 30 15 31
3905
 
3906
   The output of the second stage, i.e. the final result is:
3907
 
3908
   I1:  0  8 16 24  1  9 17 25
3909
   I2:  2 10 18 26  3 11 19 27
3910
   I3:  4 12 20 28  5 13 21 30
3911
   I4:  6 14 22 30  7 15 23 31.  */
3912
 
3913
void
3914
vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3915
                          unsigned int length,
3916
                          gimple stmt,
3917
                          gimple_stmt_iterator *gsi,
3918
                          VEC(tree,heap) **result_chain)
3919
{
3920
  tree perm_dest, vect1, vect2, high, low;
3921
  gimple perm_stmt;
3922
  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3923
  tree perm_mask_low, perm_mask_high;
3924
  unsigned int i, n;
3925
  unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
3926
  unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3927
 
3928
  *result_chain = VEC_copy (tree, heap, dr_chain);
3929
 
3930
  for (i = 0, n = nelt / 2; i < n; i++)
3931
    {
3932
      sel[i * 2] = i;
3933
      sel[i * 2 + 1] = i + nelt;
3934
    }
3935
  perm_mask_high = vect_gen_perm_mask (vectype, sel);
3936
  gcc_assert (perm_mask_high != NULL);
3937
 
3938
  for (i = 0; i < nelt; i++)
3939
    sel[i] += nelt / 2;
3940
  perm_mask_low = vect_gen_perm_mask (vectype, sel);
3941
  gcc_assert (perm_mask_low != NULL);
3942
 
3943
  for (i = 0, n = exact_log2 (length); i < n; i++)
3944
    {
3945
      for (j = 0; j < length/2; j++)
3946
        {
3947
          vect1 = VEC_index (tree, dr_chain, j);
3948
          vect2 = VEC_index (tree, dr_chain, j+length/2);
3949
 
3950
          /* Create interleaving stmt:
3951
             high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}>  */
3952
          perm_dest = create_tmp_var (vectype, "vect_inter_high");
3953
          DECL_GIMPLE_REG_P (perm_dest) = 1;
3954
          add_referenced_var (perm_dest);
3955
          high = make_ssa_name (perm_dest, NULL);
3956
          perm_stmt
3957
            = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
3958
                                             vect1, vect2, perm_mask_high);
3959
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3960
          VEC_replace (tree, *result_chain, 2*j, high);
3961
 
3962
          /* Create interleaving stmt:
3963
             low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
3964
                                                 nelt*3/2+1, ...}>  */
3965
          perm_dest = create_tmp_var (vectype, "vect_inter_low");
3966
          DECL_GIMPLE_REG_P (perm_dest) = 1;
3967
          add_referenced_var (perm_dest);
3968
          low = make_ssa_name (perm_dest, NULL);
3969
          perm_stmt
3970
            = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
3971
                                             vect1, vect2, perm_mask_low);
3972
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3973
          VEC_replace (tree, *result_chain, 2*j+1, low);
3974
        }
3975
      dr_chain = VEC_copy (tree, heap, *result_chain);
3976
    }
3977
}
3978
 
3979
/* Function vect_setup_realignment
3980
 
3981
   This function is called when vectorizing an unaligned load using
3982
   the dr_explicit_realign[_optimized] scheme.
3983
   This function generates the following code at the loop prolog:
3984
 
3985
      p = initial_addr;
3986
   x  msq_init = *(floor(p));   # prolog load
3987
      realignment_token = call target_builtin;
3988
    loop:
3989
   x  msq = phi (msq_init, ---)
3990
 
3991
   The stmts marked with x are generated only for the case of
3992
   dr_explicit_realign_optimized.
3993
 
3994
   The code above sets up a new (vector) pointer, pointing to the first
3995
   location accessed by STMT, and a "floor-aligned" load using that pointer.
3996
   It also generates code to compute the "realignment-token" (if the relevant
3997
   target hook was defined), and creates a phi-node at the loop-header bb
3998
   whose arguments are the result of the prolog-load (created by this
3999
   function) and the result of a load that takes place in the loop (to be
4000
   created by the caller to this function).
4001
 
4002
   For the case of dr_explicit_realign_optimized:
4003
   The caller to this function uses the phi-result (msq) to create the
4004
   realignment code inside the loop, and sets up the missing phi argument,
4005
   as follows:
4006
    loop:
4007
      msq = phi (msq_init, lsq)
4008
      lsq = *(floor(p'));        # load in loop
4009
      result = realign_load (msq, lsq, realignment_token);
4010
 
4011
   For the case of dr_explicit_realign:
4012
    loop:
4013
      msq = *(floor(p));        # load in loop
4014
      p' = p + (VS-1);
4015
      lsq = *(floor(p'));       # load in loop
4016
      result = realign_load (msq, lsq, realignment_token);
4017
 
4018
   Input:
4019
   STMT - (scalar) load stmt to be vectorized. This load accesses
4020
          a memory location that may be unaligned.
4021
   BSI - place where new code is to be inserted.
4022
   ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4023
                              is used.
4024
 
4025
   Output:
4026
   REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4027
                       target hook, if defined.
4028
   Return value - the result of the loop-header phi node.  */
4029
 
4030
tree
4031
vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4032
                        tree *realignment_token,
4033
                        enum dr_alignment_support alignment_support_scheme,
4034
                        tree init_addr,
4035
                        struct loop **at_loop)
4036
{
4037
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4038
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4039
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4040
  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4041
  struct loop *loop = NULL;
4042
  edge pe = NULL;
4043
  tree scalar_dest = gimple_assign_lhs (stmt);
4044
  tree vec_dest;
4045
  gimple inc;
4046
  tree ptr;
4047
  tree data_ref;
4048
  gimple new_stmt;
4049
  basic_block new_bb;
4050
  tree msq_init = NULL_TREE;
4051
  tree new_temp;
4052
  gimple phi_stmt;
4053
  tree msq = NULL_TREE;
4054
  gimple_seq stmts = NULL;
4055
  bool inv_p;
4056
  bool compute_in_loop = false;
4057
  bool nested_in_vect_loop = false;
4058
  struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4059
  struct loop *loop_for_initial_load = NULL;
4060
 
4061
  if (loop_vinfo)
4062
    {
4063
      loop = LOOP_VINFO_LOOP (loop_vinfo);
4064
      nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4065
    }
4066
 
4067
  gcc_assert (alignment_support_scheme == dr_explicit_realign
4068
              || alignment_support_scheme == dr_explicit_realign_optimized);
4069
 
4070
  /* We need to generate three things:
4071
     1. the misalignment computation
4072
     2. the extra vector load (for the optimized realignment scheme).
4073
     3. the phi node for the two vectors from which the realignment is
4074
      done (for the optimized realignment scheme).  */
4075
 
4076
  /* 1. Determine where to generate the misalignment computation.
4077
 
4078
     If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4079
     calculation will be generated by this function, outside the loop (in the
4080
     preheader).  Otherwise, INIT_ADDR had already been computed for us by the
4081
     caller, inside the loop.
4082
 
4083
     Background: If the misalignment remains fixed throughout the iterations of
4084
     the loop, then both realignment schemes are applicable, and also the
4085
     misalignment computation can be done outside LOOP.  This is because we are
4086
     vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4087
     are a multiple of VS (the Vector Size), and therefore the misalignment in
4088
     different vectorized LOOP iterations is always the same.
4089
     The problem arises only if the memory access is in an inner-loop nested
4090
     inside LOOP, which is now being vectorized using outer-loop vectorization.
4091
     This is the only case when the misalignment of the memory access may not
4092
     remain fixed throughout the iterations of the inner-loop (as explained in
4093
     detail in vect_supportable_dr_alignment).  In this case, not only is the
4094
     optimized realignment scheme not applicable, but also the misalignment
4095
     computation (and generation of the realignment token that is passed to
4096
     REALIGN_LOAD) have to be done inside the loop.
4097
 
4098
     In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4099
     or not, which in turn determines if the misalignment is computed inside
4100
     the inner-loop, or outside LOOP.  */
4101
 
4102
  if (init_addr != NULL_TREE || !loop_vinfo)
4103
    {
4104
      compute_in_loop = true;
4105
      gcc_assert (alignment_support_scheme == dr_explicit_realign);
4106
    }
4107
 
4108
 
4109
  /* 2. Determine where to generate the extra vector load.
4110
 
4111
     For the optimized realignment scheme, instead of generating two vector
4112
     loads in each iteration, we generate a single extra vector load in the
4113
     preheader of the loop, and in each iteration reuse the result of the
4114
     vector load from the previous iteration.  In case the memory access is in
4115
     an inner-loop nested inside LOOP, which is now being vectorized using
4116
     outer-loop vectorization, we need to determine whether this initial vector
4117
     load should be generated at the preheader of the inner-loop, or can be
4118
     generated at the preheader of LOOP.  If the memory access has no evolution
4119
     in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4120
     to be generated inside LOOP (in the preheader of the inner-loop).  */
4121
 
4122
  if (nested_in_vect_loop)
4123
    {
4124
      tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4125
      bool invariant_in_outerloop =
4126
            (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4127
      loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4128
    }
4129
  else
4130
    loop_for_initial_load = loop;
4131
  if (at_loop)
4132
    *at_loop = loop_for_initial_load;
4133
 
4134
  if (loop_for_initial_load)
4135
    pe = loop_preheader_edge (loop_for_initial_load);
4136
 
4137
  /* 3. For the case of the optimized realignment, create the first vector
4138
      load at the loop preheader.  */
4139
 
4140
  if (alignment_support_scheme == dr_explicit_realign_optimized)
4141
    {
4142
      /* Create msq_init = *(floor(p1)) in the loop preheader  */
4143
 
4144
      gcc_assert (!compute_in_loop);
4145
      vec_dest = vect_create_destination_var (scalar_dest, vectype);
4146
      ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4147
                                      NULL_TREE, &init_addr, NULL, &inc,
4148
                                      true, &inv_p);
4149
      new_stmt = gimple_build_assign_with_ops
4150
                   (BIT_AND_EXPR, NULL_TREE, ptr,
4151
                    build_int_cst (TREE_TYPE (ptr),
4152
                                   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4153
      new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
4154
      gimple_assign_set_lhs (new_stmt, new_temp);
4155
      new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4156
      gcc_assert (!new_bb);
4157
      data_ref
4158
        = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4159
                  build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4160
      new_stmt = gimple_build_assign (vec_dest, data_ref);
4161
      new_temp = make_ssa_name (vec_dest, new_stmt);
4162
      gimple_assign_set_lhs (new_stmt, new_temp);
4163
      mark_symbols_for_renaming (new_stmt);
4164
      if (pe)
4165
        {
4166
          new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4167
          gcc_assert (!new_bb);
4168
        }
4169
      else
4170
         gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4171
 
4172
      msq_init = gimple_assign_lhs (new_stmt);
4173
    }
4174
 
4175
  /* 4. Create realignment token using a target builtin, if available.
4176
      It is done either inside the containing loop, or before LOOP (as
4177
      determined above).  */
4178
 
4179
  if (targetm.vectorize.builtin_mask_for_load)
4180
    {
4181
      tree builtin_decl;
4182
 
4183
      /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
4184
      if (!init_addr)
4185
        {
4186
          /* Generate the INIT_ADDR computation outside LOOP.  */
4187
          init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4188
                                                        NULL_TREE, loop);
4189
          if (loop)
4190
            {
4191
              pe = loop_preheader_edge (loop);
4192
              new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4193
              gcc_assert (!new_bb);
4194
            }
4195
          else
4196
             gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4197
        }
4198
 
4199
      builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4200
      new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4201
      vec_dest =
4202
        vect_create_destination_var (scalar_dest,
4203
                                     gimple_call_return_type (new_stmt));
4204
      new_temp = make_ssa_name (vec_dest, new_stmt);
4205
      gimple_call_set_lhs (new_stmt, new_temp);
4206
 
4207
      if (compute_in_loop)
4208
        gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4209
      else
4210
        {
4211
          /* Generate the misalignment computation outside LOOP.  */
4212
          pe = loop_preheader_edge (loop);
4213
          new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4214
          gcc_assert (!new_bb);
4215
        }
4216
 
4217
      *realignment_token = gimple_call_lhs (new_stmt);
4218
 
4219
      /* The result of the CALL_EXPR to this builtin is determined from
4220
         the value of the parameter and no global variables are touched
4221
         which makes the builtin a "const" function.  Requiring the
4222
         builtin to have the "const" attribute makes it unnecessary
4223
         to call mark_call_clobbered.  */
4224
      gcc_assert (TREE_READONLY (builtin_decl));
4225
    }
4226
 
4227
  if (alignment_support_scheme == dr_explicit_realign)
4228
    return msq;
4229
 
4230
  gcc_assert (!compute_in_loop);
4231
  gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4232
 
4233
 
4234
  /* 5. Create msq = phi <msq_init, lsq> in loop  */
4235
 
4236
  pe = loop_preheader_edge (containing_loop);
4237
  vec_dest = vect_create_destination_var (scalar_dest, vectype);
4238
  msq = make_ssa_name (vec_dest, NULL);
4239
  phi_stmt = create_phi_node (msq, containing_loop->header);
4240
  SSA_NAME_DEF_STMT (msq) = phi_stmt;
4241
  add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4242
 
4243
  return msq;
4244
}
4245
 
4246
 
4247
/* Function vect_strided_load_supported.
4248
 
4249
   Returns TRUE if even and odd permutations are supported,
4250
   and FALSE otherwise.  */
4251
 
4252
bool
4253
vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4254
{
4255
  enum machine_mode mode = TYPE_MODE (vectype);
4256
 
4257
  /* vect_permute_load_chain requires the group size to be a power of two.  */
4258
  if (exact_log2 (count) == -1)
4259
    {
4260
      if (vect_print_dump_info (REPORT_DETAILS))
4261
        fprintf (vect_dump, "the size of the group of strided accesses"
4262
                 " is not a power of 2");
4263
      return false;
4264
    }
4265
 
4266
  /* Check that the permutation is supported.  */
4267
  if (VECTOR_MODE_P (mode))
4268
    {
4269
      unsigned int i, nelt = GET_MODE_NUNITS (mode);
4270
      unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4271
 
4272
      for (i = 0; i < nelt; i++)
4273
        sel[i] = i * 2;
4274
      if (can_vec_perm_p (mode, false, sel))
4275
        {
4276
          for (i = 0; i < nelt; i++)
4277
            sel[i] = i * 2 + 1;
4278
          if (can_vec_perm_p (mode, false, sel))
4279
            return true;
4280
        }
4281
    }
4282
 
4283
  if (vect_print_dump_info (REPORT_DETAILS))
4284
    fprintf (vect_dump, "extract even/odd not supported by target");
4285
  return false;
4286
}
4287
 
4288
/* Return TRUE if vec_load_lanes is available for COUNT vectors of
4289
   type VECTYPE.  */
4290
 
4291
bool
4292
vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4293
{
4294
  return vect_lanes_optab_supported_p ("vec_load_lanes",
4295
                                       vec_load_lanes_optab,
4296
                                       vectype, count);
4297
}
4298
 
4299
/* Function vect_permute_load_chain.
4300
 
4301
   Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4302
   a power of 2, generate extract_even/odd stmts to reorder the input data
4303
   correctly.  Return the final references for loads in RESULT_CHAIN.
4304
 
4305
   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4306
   The input is 4 vectors each containing 8 elements. We assign a number to each
4307
   element, the input sequence is:
4308
 
4309
   1st vec:   0  1  2  3  4  5  6  7
4310
   2nd vec:   8  9 10 11 12 13 14 15
4311
   3rd vec:  16 17 18 19 20 21 22 23
4312
   4th vec:  24 25 26 27 28 29 30 31
4313
 
4314
   The output sequence should be:
4315
 
4316
   1st vec:  0 4  8 12 16 20 24 28
4317
   2nd vec:  1 5  9 13 17 21 25 29
4318
   3rd vec:  2 6 10 14 18 22 26 30
4319
   4th vec:  3 7 11 15 19 23 27 31
4320
 
4321
   i.e., the first output vector should contain the first elements of each
4322
   interleaving group, etc.
4323
 
4324
   We use extract_even/odd instructions to create such output.  The input of
4325
   each extract_even/odd operation is two vectors
4326
   1st vec    2nd vec
4327
 
4328
 
4329
   and the output is the vector of extracted even/odd elements.  The output of
4330
   extract_even will be:   0 2 4 6
4331
   and of extract_odd:     1 3 5 7
4332
 
4333
 
4334
   The permutation is done in log LENGTH stages.  In each stage extract_even
4335
   and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4336
   their order.  In our example,
4337
 
4338
   E1: extract_even (1st vec, 2nd vec)
4339
   E2: extract_odd (1st vec, 2nd vec)
4340
   E3: extract_even (3rd vec, 4th vec)
4341
   E4: extract_odd (3rd vec, 4th vec)
4342
 
4343
   The output for the first stage will be:
4344
 
4345
   E1:  0  2  4  6  8 10 12 14
4346
   E2:  1  3  5  7  9 11 13 15
4347
   E3: 16 18 20 22 24 26 28 30
4348
   E4: 17 19 21 23 25 27 29 31
4349
 
4350
   In order to proceed and create the correct sequence for the next stage (or
4351
   for the correct output, if the second stage is the last one, as in our
4352
   example), we first put the output of extract_even operation and then the
4353
   output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4354
   The input for the second stage is:
4355
 
4356
   1st vec (E1):  0  2  4  6  8 10 12 14
4357
   2nd vec (E3): 16 18 20 22 24 26 28 30
4358
   3rd vec (E2):  1  3  5  7  9 11 13 15
4359
   4th vec (E4): 17 19 21 23 25 27 29 31
4360
 
4361
   The output of the second stage:
4362
 
4363
   E1: 0 4  8 12 16 20 24 28
4364
   E2: 2 6 10 14 18 22 26 30
4365
   E3: 1 5  9 13 17 21 25 29
4366
   E4: 3 7 11 15 19 23 27 31
4367
 
4368
   And RESULT_CHAIN after reordering:
4369
 
4370
   1st vec (E1):  0 4  8 12 16 20 24 28
4371
   2nd vec (E3):  1 5  9 13 17 21 25 29
4372
   3rd vec (E2):  2 6 10 14 18 22 26 30
4373
   4th vec (E4):  3 7 11 15 19 23 27 31.  */
4374
 
4375
static void
4376
vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4377
                         unsigned int length,
4378
                         gimple stmt,
4379
                         gimple_stmt_iterator *gsi,
4380
                         VEC(tree,heap) **result_chain)
4381
{
4382
  tree perm_dest, data_ref, first_vect, second_vect;
4383
  tree perm_mask_even, perm_mask_odd;
4384
  gimple perm_stmt;
4385
  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4386
  unsigned int i, j, log_length = exact_log2 (length);
4387
  unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4388
  unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4389
 
4390
  *result_chain = VEC_copy (tree, heap, dr_chain);
4391
 
4392
  for (i = 0; i < nelt; ++i)
4393
    sel[i] = i * 2;
4394
  perm_mask_even = vect_gen_perm_mask (vectype, sel);
4395
  gcc_assert (perm_mask_even != NULL);
4396
 
4397
  for (i = 0; i < nelt; ++i)
4398
    sel[i] = i * 2 + 1;
4399
  perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4400
  gcc_assert (perm_mask_odd != NULL);
4401
 
4402
  for (i = 0; i < log_length; i++)
4403
    {
4404
      for (j = 0; j < length; j += 2)
4405
        {
4406
          first_vect = VEC_index (tree, dr_chain, j);
4407
          second_vect = VEC_index (tree, dr_chain, j+1);
4408
 
4409
          /* data_ref = permute_even (first_data_ref, second_data_ref);  */
4410
          perm_dest = create_tmp_var (vectype, "vect_perm_even");
4411
          DECL_GIMPLE_REG_P (perm_dest) = 1;
4412
          add_referenced_var (perm_dest);
4413
 
4414
          perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4415
                                                     first_vect, second_vect,
4416
                                                     perm_mask_even);
4417
 
4418
          data_ref = make_ssa_name (perm_dest, perm_stmt);
4419
          gimple_assign_set_lhs (perm_stmt, data_ref);
4420
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4421
          mark_symbols_for_renaming (perm_stmt);
4422
 
4423
          VEC_replace (tree, *result_chain, j/2, data_ref);
4424
 
4425
          /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
4426
          perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4427
          DECL_GIMPLE_REG_P (perm_dest) = 1;
4428
          add_referenced_var (perm_dest);
4429
 
4430
          perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4431
                                                     first_vect, second_vect,
4432
                                                     perm_mask_odd);
4433
 
4434
          data_ref = make_ssa_name (perm_dest, perm_stmt);
4435
          gimple_assign_set_lhs (perm_stmt, data_ref);
4436
          vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4437
          mark_symbols_for_renaming (perm_stmt);
4438
 
4439
          VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4440
        }
4441
      dr_chain = VEC_copy (tree, heap, *result_chain);
4442
    }
4443
}
4444
 
4445
 
4446
/* Function vect_transform_strided_load.
4447
 
4448
   Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4449
   to perform their permutation and ascribe the result vectorized statements to
4450
   the scalar statements.
4451
*/
4452
 
4453
void
4454
vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4455
                             gimple_stmt_iterator *gsi)
4456
{
4457
  VEC(tree,heap) *result_chain = NULL;
4458
 
4459
  /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4460
     RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4461
     vectors, that are ready for vector computation.  */
4462
  result_chain = VEC_alloc (tree, heap, size);
4463
  vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4464
  vect_record_strided_load_vectors (stmt, result_chain);
4465
  VEC_free (tree, heap, result_chain);
4466
}
4467
 
4468
/* RESULT_CHAIN contains the output of a group of strided loads that were
4469
   generated as part of the vectorization of STMT.  Assign the statement
4470
   for each vector to the associated scalar statement.  */
4471
 
4472
void
4473
vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4474
{
4475
  gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4476
  gimple next_stmt, new_stmt;
4477
  unsigned int i, gap_count;
4478
  tree tmp_data_ref;
4479
 
4480
  /* Put a permuted data-ref in the VECTORIZED_STMT field.
4481
     Since we scan the chain starting from it's first node, their order
4482
     corresponds the order of data-refs in RESULT_CHAIN.  */
4483
  next_stmt = first_stmt;
4484
  gap_count = 1;
4485
  FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4486
    {
4487
      if (!next_stmt)
4488
        break;
4489
 
4490
      /* Skip the gaps.  Loads created for the gaps will be removed by dead
4491
       code elimination pass later.  No need to check for the first stmt in
4492
       the group, since it always exists.
4493
       GROUP_GAP is the number of steps in elements from the previous
4494
       access (if there is no gap GROUP_GAP is 1).  We skip loads that
4495
       correspond to the gaps.  */
4496
      if (next_stmt != first_stmt
4497
          && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4498
      {
4499
        gap_count++;
4500
        continue;
4501
      }
4502
 
4503
      while (next_stmt)
4504
        {
4505
          new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4506
          /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4507
             copies, and we put the new vector statement in the first available
4508
             RELATED_STMT.  */
4509
          if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4510
            STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4511
          else
4512
            {
4513
              if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4514
                {
4515
                  gimple prev_stmt =
4516
                    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4517
                  gimple rel_stmt =
4518
                    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4519
                  while (rel_stmt)
4520
                    {
4521
                      prev_stmt = rel_stmt;
4522
                      rel_stmt =
4523
                        STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4524
                    }
4525
 
4526
                  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4527
                    new_stmt;
4528
                }
4529
            }
4530
 
4531
          next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4532
          gap_count = 1;
4533
          /* If NEXT_STMT accesses the same DR as the previous statement,
4534
             put the same TMP_DATA_REF as its vectorized statement; otherwise
4535
             get the next data-ref from RESULT_CHAIN.  */
4536
          if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4537
            break;
4538
        }
4539
    }
4540
}
4541
 
4542
/* Function vect_force_dr_alignment_p.
4543
 
4544
   Returns whether the alignment of a DECL can be forced to be aligned
4545
   on ALIGNMENT bit boundary.  */
4546
 
4547
bool
4548
vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4549
{
4550
  if (TREE_CODE (decl) != VAR_DECL)
4551
    return false;
4552
 
4553
  if (DECL_EXTERNAL (decl))
4554
    return false;
4555
 
4556
  if (TREE_ASM_WRITTEN (decl))
4557
    return false;
4558
 
4559
  if (TREE_STATIC (decl))
4560
    return (alignment <= MAX_OFILE_ALIGNMENT);
4561
  else
4562
    return (alignment <= MAX_STACK_ALIGNMENT);
4563
}
4564
 
4565
 
4566
/* Return whether the data reference DR is supported with respect to its
4567
   alignment.
4568
   If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4569
   it is aligned, i.e., check if it is possible to vectorize it with different
4570
   alignment.  */
4571
 
4572
enum dr_alignment_support
4573
vect_supportable_dr_alignment (struct data_reference *dr,
4574
                               bool check_aligned_accesses)
4575
{
4576
  gimple stmt = DR_STMT (dr);
4577
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4578
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4579
  enum machine_mode mode = TYPE_MODE (vectype);
4580
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4581
  struct loop *vect_loop = NULL;
4582
  bool nested_in_vect_loop = false;
4583
 
4584
  if (aligned_access_p (dr) && !check_aligned_accesses)
4585
    return dr_aligned;
4586
 
4587
  if (loop_vinfo)
4588
    {
4589
      vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4590
      nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4591
    }
4592
 
4593
  /* Possibly unaligned access.  */
4594
 
4595
  /* We can choose between using the implicit realignment scheme (generating
4596
     a misaligned_move stmt) and the explicit realignment scheme (generating
4597
     aligned loads with a REALIGN_LOAD).  There are two variants to the
4598
     explicit realignment scheme: optimized, and unoptimized.
4599
     We can optimize the realignment only if the step between consecutive
4600
     vector loads is equal to the vector size.  Since the vector memory
4601
     accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4602
     is guaranteed that the misalignment amount remains the same throughout the
4603
     execution of the vectorized loop.  Therefore, we can create the
4604
     "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4605
     at the loop preheader.
4606
 
4607
     However, in the case of outer-loop vectorization, when vectorizing a
4608
     memory access in the inner-loop nested within the LOOP that is now being
4609
     vectorized, while it is guaranteed that the misalignment of the
4610
     vectorized memory access will remain the same in different outer-loop
4611
     iterations, it is *not* guaranteed that is will remain the same throughout
4612
     the execution of the inner-loop.  This is because the inner-loop advances
4613
     with the original scalar step (and not in steps of VS).  If the inner-loop
4614
     step happens to be a multiple of VS, then the misalignment remains fixed
4615
     and we can use the optimized realignment scheme.  For example:
4616
 
4617
      for (i=0; i<N; i++)
4618
        for (j=0; j<M; j++)
4619
          s += a[i+j];
4620
 
4621
     When vectorizing the i-loop in the above example, the step between
4622
     consecutive vector loads is 1, and so the misalignment does not remain
4623
     fixed across the execution of the inner-loop, and the realignment cannot
4624
     be optimized (as illustrated in the following pseudo vectorized loop):
4625
 
4626
      for (i=0; i<N; i+=4)
4627
        for (j=0; j<M; j++){
4628
          vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4629
                         // when j is {0,1,2,3,4,5,6,7,...} respectively.
4630
                         // (assuming that we start from an aligned address).
4631
          }
4632
 
4633
     We therefore have to use the unoptimized realignment scheme:
4634
 
4635
      for (i=0; i<N; i+=4)
4636
          for (j=k; j<M; j+=4)
4637
          vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4638
                           // that the misalignment of the initial address is
4639
                           // 0).
4640
 
4641
     The loop can then be vectorized as follows:
4642
 
4643
      for (k=0; k<4; k++){
4644
        rt = get_realignment_token (&vp[k]);
4645
        for (i=0; i<N; i+=4){
4646
          v1 = vp[i+k];
4647
          for (j=k; j<M; j+=4){
4648
            v2 = vp[i+j+VS-1];
4649
            va = REALIGN_LOAD <v1,v2,rt>;
4650
            vs += va;
4651
            v1 = v2;
4652
          }
4653
        }
4654
    } */
4655
 
4656
  if (DR_IS_READ (dr))
4657
    {
4658
      bool is_packed = false;
4659
      tree type = (TREE_TYPE (DR_REF (dr)));
4660
 
4661
      if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4662
          && (!targetm.vectorize.builtin_mask_for_load
4663
              || targetm.vectorize.builtin_mask_for_load ()))
4664
        {
4665
          tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4666
          if ((nested_in_vect_loop
4667
               && (TREE_INT_CST_LOW (DR_STEP (dr))
4668
                   != GET_MODE_SIZE (TYPE_MODE (vectype))))
4669
              || !loop_vinfo)
4670
            return dr_explicit_realign;
4671
          else
4672
            return dr_explicit_realign_optimized;
4673
        }
4674
      if (!known_alignment_for_access_p (dr))
4675
        {
4676
          tree ba = DR_BASE_OBJECT (dr);
4677
 
4678
          if (ba)
4679
            is_packed = contains_packed_reference (ba);
4680
        }
4681
 
4682
      if (targetm.vectorize.
4683
          support_vector_misalignment (mode, type,
4684
                                       DR_MISALIGNMENT (dr), is_packed))
4685
        /* Can't software pipeline the loads, but can at least do them.  */
4686
        return dr_unaligned_supported;
4687
    }
4688
  else
4689
    {
4690
      bool is_packed = false;
4691
      tree type = (TREE_TYPE (DR_REF (dr)));
4692
 
4693
      if (!known_alignment_for_access_p (dr))
4694
        {
4695
          tree ba = DR_BASE_OBJECT (dr);
4696
 
4697
          if (ba)
4698
            is_packed = contains_packed_reference (ba);
4699
        }
4700
 
4701
     if (targetm.vectorize.
4702
         support_vector_misalignment (mode, type,
4703
                                      DR_MISALIGNMENT (dr), is_packed))
4704
       return dr_unaligned_supported;
4705
    }
4706
 
4707
  /* Unsupported.  */
4708
  return dr_unaligned_unsupported;
4709
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.